]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.2-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 17 Apr 2023 08:06:28 +0000 (10:06 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 17 Apr 2023 08:06:28 +0000 (10:06 +0200)
added patches:
cgroup-cpuset-add-cpuset_can_fork-and-cpuset_cancel_fork-methods.patch
cgroup-cpuset-fix-partition-root-s-cpuset.cpus-update-bug.patch
cgroup-cpuset-make-cpuset_fork-handle-clone_into_cgroup-properly.patch
cgroup-cpuset-wake-up-cpuset_attach_wq-tasks-in-cpuset_cancel_attach.patch
cgroup-fix-display-of-forceidle-time-at-root.patch
drm-amd-pm-correct-smu13.0.7-max-shader-clock-reporting.patch
drm-amd-pm-correct-smu13.0.7-pstate-profiling-clock-settings.patch
drm-amd-pm-correct-the-pcie-link-state-check-for-smu13.patch
mptcp-fix-null-pointer-dereference-on-fastopen-early-fallback.patch
mptcp-stricter-state-check-in-mptcp_worker.patch
mptcp-use-mptcp_schedule_work-instead-of-open-coding-it.patch
pci-fix-use-after-free-in-pci_bus_release_domain_nr.patch
pci-msi-provide-missing-stub-for-pci_msix_can_alloc_dyn.patch
scsi-ses-handle-enclosure-with-just-a-primary-component-gracefully.patch
selftests-mptcp-userspace-pm-uniform-verify-events.patch
thermal-intel-avoid-updating-unsupported-therm_status_clear-mask-bits.patch
x86-pci-add-quirk-for-amd-xhci-controller-that-loses-msi-x-state-in-d3hot.patch

18 files changed:
queue-6.2/cgroup-cpuset-add-cpuset_can_fork-and-cpuset_cancel_fork-methods.patch [new file with mode: 0644]
queue-6.2/cgroup-cpuset-fix-partition-root-s-cpuset.cpus-update-bug.patch [new file with mode: 0644]
queue-6.2/cgroup-cpuset-make-cpuset_fork-handle-clone_into_cgroup-properly.patch [new file with mode: 0644]
queue-6.2/cgroup-cpuset-wake-up-cpuset_attach_wq-tasks-in-cpuset_cancel_attach.patch [new file with mode: 0644]
queue-6.2/cgroup-fix-display-of-forceidle-time-at-root.patch [new file with mode: 0644]
queue-6.2/drm-amd-pm-correct-smu13.0.7-max-shader-clock-reporting.patch [new file with mode: 0644]
queue-6.2/drm-amd-pm-correct-smu13.0.7-pstate-profiling-clock-settings.patch [new file with mode: 0644]
queue-6.2/drm-amd-pm-correct-the-pcie-link-state-check-for-smu13.patch [new file with mode: 0644]
queue-6.2/mptcp-fix-null-pointer-dereference-on-fastopen-early-fallback.patch [new file with mode: 0644]
queue-6.2/mptcp-stricter-state-check-in-mptcp_worker.patch [new file with mode: 0644]
queue-6.2/mptcp-use-mptcp_schedule_work-instead-of-open-coding-it.patch [new file with mode: 0644]
queue-6.2/pci-fix-use-after-free-in-pci_bus_release_domain_nr.patch [new file with mode: 0644]
queue-6.2/pci-msi-provide-missing-stub-for-pci_msix_can_alloc_dyn.patch [new file with mode: 0644]
queue-6.2/scsi-ses-handle-enclosure-with-just-a-primary-component-gracefully.patch [new file with mode: 0644]
queue-6.2/selftests-mptcp-userspace-pm-uniform-verify-events.patch [new file with mode: 0644]
queue-6.2/series
queue-6.2/thermal-intel-avoid-updating-unsupported-therm_status_clear-mask-bits.patch [new file with mode: 0644]
queue-6.2/x86-pci-add-quirk-for-amd-xhci-controller-that-loses-msi-x-state-in-d3hot.patch [new file with mode: 0644]

diff --git a/queue-6.2/cgroup-cpuset-add-cpuset_can_fork-and-cpuset_cancel_fork-methods.patch b/queue-6.2/cgroup-cpuset-add-cpuset_can_fork-and-cpuset_cancel_fork-methods.patch
new file mode 100644 (file)
index 0000000..b7596e3
--- /dev/null
@@ -0,0 +1,171 @@
+From eee87853794187f6adbe19533ed79c8b44b36a91 Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Tue, 11 Apr 2023 09:35:59 -0400
+Subject: cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Waiman Long <longman@redhat.com>
+
+commit eee87853794187f6adbe19533ed79c8b44b36a91 upstream.
+
+In the case of CLONE_INTO_CGROUP, not all cpusets are ready to accept
+new tasks. It is too late to check that in cpuset_fork(). So we need
+to add the cpuset_can_fork() and cpuset_cancel_fork() methods to
+pre-check it before we can allow attachment to a different cpuset.
+
+We also need to set the attach_in_progress flag to alert other code
+that a new task is going to be added to the cpuset.
+
+Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
+Suggested-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cpuset.c |   97 +++++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 86 insertions(+), 11 deletions(-)
+
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -2453,6 +2453,20 @@ static int fmeter_getrate(struct fmeter
+ static struct cpuset *cpuset_attach_old_cs;
++/*
++ * Check to see if a cpuset can accept a new task
++ * For v1, cpus_allowed and mems_allowed can't be empty.
++ * For v2, effective_cpus can't be empty.
++ * Note that in v1, effective_cpus = cpus_allowed.
++ */
++static int cpuset_can_attach_check(struct cpuset *cs)
++{
++      if (cpumask_empty(cs->effective_cpus) ||
++         (!is_in_v2_mode() && nodes_empty(cs->mems_allowed)))
++              return -ENOSPC;
++      return 0;
++}
++
+ /* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
+ static int cpuset_can_attach(struct cgroup_taskset *tset)
+ {
+@@ -2467,16 +2481,9 @@ static int cpuset_can_attach(struct cgro
+       percpu_down_write(&cpuset_rwsem);
+-      /* allow moving tasks into an empty cpuset if on default hierarchy */
+-      ret = -ENOSPC;
+-      if (!is_in_v2_mode() &&
+-          (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
+-              goto out_unlock;
+-
+-      /*
+-       * Task cannot be moved to a cpuset with empty effective cpus.
+-       */
+-      if (cpumask_empty(cs->effective_cpus))
++      /* Check to see if task is allowed in the cpuset */
++      ret = cpuset_can_attach_check(cs);
++      if (ret)
+               goto out_unlock;
+       cgroup_taskset_for_each(task, css, tset) {
+@@ -2493,7 +2500,6 @@ static int cpuset_can_attach(struct cgro
+        * changes which zero cpus/mems_allowed.
+        */
+       cs->attach_in_progress++;
+-      ret = 0;
+ out_unlock:
+       percpu_up_write(&cpuset_rwsem);
+       return ret;
+@@ -3265,6 +3271,68 @@ static void cpuset_bind(struct cgroup_su
+ }
+ /*
++ * In case the child is cloned into a cpuset different from its parent,
++ * additional checks are done to see if the move is allowed.
++ */
++static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
++{
++      struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
++      bool same_cs;
++      int ret;
++
++      rcu_read_lock();
++      same_cs = (cs == task_cs(current));
++      rcu_read_unlock();
++
++      if (same_cs)
++              return 0;
++
++      lockdep_assert_held(&cgroup_mutex);
++      percpu_down_write(&cpuset_rwsem);
++
++      /* Check to see if task is allowed in the cpuset */
++      ret = cpuset_can_attach_check(cs);
++      if (ret)
++              goto out_unlock;
++
++      ret = task_can_attach(task, cs->effective_cpus);
++      if (ret)
++              goto out_unlock;
++
++      ret = security_task_setscheduler(task);
++      if (ret)
++              goto out_unlock;
++
++      /*
++       * Mark attach is in progress.  This makes validate_change() fail
++       * changes which zero cpus/mems_allowed.
++       */
++      cs->attach_in_progress++;
++out_unlock:
++      percpu_up_write(&cpuset_rwsem);
++      return ret;
++}
++
++static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
++{
++      struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
++      bool same_cs;
++
++      rcu_read_lock();
++      same_cs = (cs == task_cs(current));
++      rcu_read_unlock();
++
++      if (same_cs)
++              return;
++
++      percpu_down_write(&cpuset_rwsem);
++      cs->attach_in_progress--;
++      if (!cs->attach_in_progress)
++              wake_up(&cpuset_attach_wq);
++      percpu_up_write(&cpuset_rwsem);
++}
++
++/*
+  * Make sure the new task conform to the current state of its parent,
+  * which could have been changed by cpuset just after it inherits the
+  * state from the parent and before it sits on the cgroup's task list.
+@@ -3292,6 +3360,11 @@ static void cpuset_fork(struct task_stru
+       percpu_down_write(&cpuset_rwsem);
+       guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+       cpuset_attach_task(cs, task);
++
++      cs->attach_in_progress--;
++      if (!cs->attach_in_progress)
++              wake_up(&cpuset_attach_wq);
++
+       percpu_up_write(&cpuset_rwsem);
+ }
+@@ -3305,6 +3378,8 @@ struct cgroup_subsys cpuset_cgrp_subsys
+       .attach         = cpuset_attach,
+       .post_attach    = cpuset_post_attach,
+       .bind           = cpuset_bind,
++      .can_fork       = cpuset_can_fork,
++      .cancel_fork    = cpuset_cancel_fork,
+       .fork           = cpuset_fork,
+       .legacy_cftypes = legacy_files,
+       .dfl_cftypes    = dfl_files,
diff --git a/queue-6.2/cgroup-cpuset-fix-partition-root-s-cpuset.cpus-update-bug.patch b/queue-6.2/cgroup-cpuset-fix-partition-root-s-cpuset.cpus-update-bug.patch
new file mode 100644 (file)
index 0000000..ba5814d
--- /dev/null
@@ -0,0 +1,68 @@
+From 292fd843de26c551856e66faf134512c52dd78b4 Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Fri, 17 Mar 2023 11:15:05 -0400
+Subject: cgroup/cpuset: Fix partition root's cpuset.cpus update bug
+
+From: Waiman Long <longman@redhat.com>
+
+commit 292fd843de26c551856e66faf134512c52dd78b4 upstream.
+
+It was found that commit 7a2127e66a00 ("cpuset: Call
+set_cpus_allowed_ptr() with appropriate mask for task") introduced a bug
+that corrupted "cpuset.cpus" of a partition root when it was updated.
+
+It is because the tmp->new_cpus field of the passed tmp parameter
+of update_parent_subparts_cpumask() should not be used at all as
+it contains important cpumask data that should not be overwritten.
+Fix it by using tmp->addmask instead.
+
+Also update update_cpumask() to make sure that trialcs->cpu_allowed
+will not be corrupted until it is no longer needed.
+
+Fixes: 7a2127e66a00 ("cpuset: Call set_cpus_allowed_ptr() with appropriate mask for task")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Cc: stable@vger.kernel.org # v6.2+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cpuset.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -1513,7 +1513,7 @@ static int update_parent_subparts_cpumas
+       spin_unlock_irq(&callback_lock);
+       if (adding || deleting)
+-              update_tasks_cpumask(parent, tmp->new_cpus);
++              update_tasks_cpumask(parent, tmp->addmask);
+       /*
+        * Set or clear CS_SCHED_LOAD_BALANCE when partcmd_update, if necessary.
+@@ -1770,10 +1770,13 @@ static int update_cpumask(struct cpuset
+       /*
+        * Use the cpumasks in trialcs for tmpmasks when they are pointers
+        * to allocated cpumasks.
++       *
++       * Note that update_parent_subparts_cpumask() uses only addmask &
++       * delmask, but not new_cpus.
+        */
+       tmp.addmask  = trialcs->subparts_cpus;
+       tmp.delmask  = trialcs->effective_cpus;
+-      tmp.new_cpus = trialcs->cpus_allowed;
++      tmp.new_cpus = NULL;
+ #endif
+       retval = validate_change(cs, trialcs);
+@@ -1838,6 +1841,11 @@ static int update_cpumask(struct cpuset
+       }
+       spin_unlock_irq(&callback_lock);
++#ifdef CONFIG_CPUMASK_OFFSTACK
++      /* Now trialcs->cpus_allowed is available */
++      tmp.new_cpus = trialcs->cpus_allowed;
++#endif
++
+       /* effective_cpus will be updated here */
+       update_cpumasks_hier(cs, &tmp, false);
diff --git a/queue-6.2/cgroup-cpuset-make-cpuset_fork-handle-clone_into_cgroup-properly.patch b/queue-6.2/cgroup-cpuset-make-cpuset_fork-handle-clone_into_cgroup-properly.patch
new file mode 100644 (file)
index 0000000..d895b87
--- /dev/null
@@ -0,0 +1,128 @@
+From 42a11bf5c5436e91b040aeb04063be1710bb9f9c Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Tue, 11 Apr 2023 09:35:58 -0400
+Subject: cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly
+
+From: Waiman Long <longman@redhat.com>
+
+commit 42a11bf5c5436e91b040aeb04063be1710bb9f9c upstream.
+
+By default, the clone(2) syscall spawn a child process into the same
+cgroup as its parent. With the use of the CLONE_INTO_CGROUP flag
+introduced by commit ef2c41cf38a7 ("clone3: allow spawning processes
+into cgroups"), the child will be spawned into a different cgroup which
+is somewhat similar to writing the child's tid into "cgroup.threads".
+
+The current cpuset_fork() method does not properly handle the
+CLONE_INTO_CGROUP case where the cpuset of the child may be different
+from that of its parent.  Update the cpuset_fork() method to treat the
+CLONE_INTO_CGROUP case similar to cpuset_attach().
+
+Since the newly cloned task has not been running yet, its actual
+memory usage isn't known. So it is not necessary to make change to mm
+in cpuset_fork().
+
+Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
+Reported-by: Giuseppe Scrivano <gscrivan@redhat.com>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cpuset.c |   62 +++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 42 insertions(+), 20 deletions(-)
+
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -2515,16 +2515,33 @@ static void cpuset_cancel_attach(struct
+ }
+ /*
+- * Protected by cpuset_rwsem.  cpus_attach is used only by cpuset_attach()
++ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
+  * but we can't allocate it dynamically there.  Define it global and
+  * allocate from cpuset_init().
+  */
+ static cpumask_var_t cpus_attach;
++static nodemask_t cpuset_attach_nodemask_to;
++
++static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
++{
++      percpu_rwsem_assert_held(&cpuset_rwsem);
++
++      if (cs != &top_cpuset)
++              guarantee_online_cpus(task, cpus_attach);
++      else
++              cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
++      /*
++       * can_attach beforehand should guarantee that this doesn't
++       * fail.  TODO: have a better way to handle failure here
++       */
++      WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
++
++      cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
++      cpuset_update_task_spread_flags(cs, task);
++}
+ static void cpuset_attach(struct cgroup_taskset *tset)
+ {
+-      /* static buf protected by cpuset_rwsem */
+-      static nodemask_t cpuset_attach_nodemask_to;
+       struct task_struct *task;
+       struct task_struct *leader;
+       struct cgroup_subsys_state *css;
+@@ -2555,20 +2572,8 @@ static void cpuset_attach(struct cgroup_
+       guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+-      cgroup_taskset_for_each(task, css, tset) {
+-              if (cs != &top_cpuset)
+-                      guarantee_online_cpus(task, cpus_attach);
+-              else
+-                      cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
+-              /*
+-               * can_attach beforehand should guarantee that this doesn't
+-               * fail.  TODO: have a better way to handle failure here
+-               */
+-              WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
+-
+-              cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
+-              cpuset_update_task_spread_flags(cs, task);
+-      }
++      cgroup_taskset_for_each(task, css, tset)
++              cpuset_attach_task(cs, task);
+       /*
+        * Change mm for all threadgroup leaders. This is expensive and may
+@@ -3266,11 +3271,28 @@ static void cpuset_bind(struct cgroup_su
+  */
+ static void cpuset_fork(struct task_struct *task)
+ {
+-      if (task_css_is_root(task, cpuset_cgrp_id))
++      struct cpuset *cs;
++      bool same_cs;
++
++      rcu_read_lock();
++      cs = task_cs(task);
++      same_cs = (cs == task_cs(current));
++      rcu_read_unlock();
++
++      if (same_cs) {
++              if (cs == &top_cpuset)
++                      return;
++
++              set_cpus_allowed_ptr(task, current->cpus_ptr);
++              task->mems_allowed = current->mems_allowed;
+               return;
++      }
+-      set_cpus_allowed_ptr(task, current->cpus_ptr);
+-      task->mems_allowed = current->mems_allowed;
++      /* CLONE_INTO_CGROUP */
++      percpu_down_write(&cpuset_rwsem);
++      guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
++      cpuset_attach_task(cs, task);
++      percpu_up_write(&cpuset_rwsem);
+ }
+ struct cgroup_subsys cpuset_cgrp_subsys = {
diff --git a/queue-6.2/cgroup-cpuset-wake-up-cpuset_attach_wq-tasks-in-cpuset_cancel_attach.patch b/queue-6.2/cgroup-cpuset-wake-up-cpuset_attach_wq-tasks-in-cpuset_cancel_attach.patch
new file mode 100644 (file)
index 0000000..797261e
--- /dev/null
@@ -0,0 +1,49 @@
+From ba9182a89626d5f83c2ee4594f55cb9c1e60f0e2 Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Tue, 11 Apr 2023 09:35:57 -0400
+Subject: cgroup/cpuset: Wake up cpuset_attach_wq tasks in cpuset_cancel_attach()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Waiman Long <longman@redhat.com>
+
+commit ba9182a89626d5f83c2ee4594f55cb9c1e60f0e2 upstream.
+
+After a successful cpuset_can_attach() call which increments the
+attach_in_progress flag, either cpuset_cancel_attach() or cpuset_attach()
+will be called later. In cpuset_attach(), tasks in cpuset_attach_wq,
+if present, will be woken up at the end. That is not the case in
+cpuset_cancel_attach(). So missed wakeup is possible if the attach
+operation is somehow cancelled. Fix that by doing the wakeup in
+cpuset_cancel_attach() as well.
+
+Fixes: e44193d39e8d ("cpuset: let hotplug propagation work wait for task attaching")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Reviewed-by: Michal Koutný <mkoutny@suse.com>
+Cc: stable@vger.kernel.org # v3.11+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cpuset.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -2502,11 +2502,15 @@ out_unlock:
+ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
+ {
+       struct cgroup_subsys_state *css;
++      struct cpuset *cs;
+       cgroup_taskset_first(tset, &css);
++      cs = css_cs(css);
+       percpu_down_write(&cpuset_rwsem);
+-      css_cs(css)->attach_in_progress--;
++      cs->attach_in_progress--;
++      if (!cs->attach_in_progress)
++              wake_up(&cpuset_attach_wq);
+       percpu_up_write(&cpuset_rwsem);
+ }
diff --git a/queue-6.2/cgroup-fix-display-of-forceidle-time-at-root.patch b/queue-6.2/cgroup-fix-display-of-forceidle-time-at-root.patch
new file mode 100644 (file)
index 0000000..af65ee0
--- /dev/null
@@ -0,0 +1,37 @@
+From fcdb1eda5302599045bb366e679cccb4216f3873 Mon Sep 17 00:00:00 2001
+From: Josh Don <joshdon@google.com>
+Date: Wed, 15 Mar 2023 14:40:29 -0700
+Subject: cgroup: fix display of forceidle time at root
+
+From: Josh Don <joshdon@google.com>
+
+commit fcdb1eda5302599045bb366e679cccb4216f3873 upstream.
+
+We need to reset forceidle_sum to 0 when reading from root, since the
+bstat we accumulate into is stack allocated.
+
+To make this more robust, just replace the existing cputime reset with a
+memset of the overall bstat.
+
+Signed-off-by: Josh Don <joshdon@google.com>
+Fixes: 1fcf54deb767 ("sched/core: add forced idle accounting for cgroups")
+Cc: stable@vger.kernel.org # v6.0+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/rstat.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/kernel/cgroup/rstat.c
++++ b/kernel/cgroup/rstat.c
+@@ -457,9 +457,7 @@ static void root_cgroup_cputime(struct c
+       struct task_cputime *cputime = &bstat->cputime;
+       int i;
+-      cputime->stime = 0;
+-      cputime->utime = 0;
+-      cputime->sum_exec_runtime = 0;
++      memset(bstat, 0, sizeof(*bstat));
+       for_each_possible_cpu(i) {
+               struct kernel_cpustat kcpustat;
+               u64 *cpustat = kcpustat.cpustat;
diff --git a/queue-6.2/drm-amd-pm-correct-smu13.0.7-max-shader-clock-reporting.patch b/queue-6.2/drm-amd-pm-correct-smu13.0.7-max-shader-clock-reporting.patch
new file mode 100644 (file)
index 0000000..3b3e3ad
--- /dev/null
@@ -0,0 +1,105 @@
+From 85e0689eb6b10cd3b2fb455d1b3f4d4d0b13ff78 Mon Sep 17 00:00:00 2001
+From: Horatio Zhang <Hongkun.Zhang@amd.com>
+Date: Thu, 6 Apr 2023 13:32:14 +0800
+Subject: drm/amd/pm: correct SMU13.0.7 max shader clock reporting
+
+From: Horatio Zhang <Hongkun.Zhang@amd.com>
+
+commit 85e0689eb6b10cd3b2fb455d1b3f4d4d0b13ff78 upstream.
+
+Correct the max shader clock reporting on SMU
+13.0.7.
+
+Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com>
+Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.1.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c |   61 ++++++++++++++++++-
+ 1 file changed, 60 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+@@ -575,6 +575,14 @@ static int smu_v13_0_7_set_default_dpm_t
+                                                    dpm_table);
+               if (ret)
+                       return ret;
++
++              if (skutable->DriverReportedClocks.GameClockAc &&
++                      (dpm_table->dpm_levels[dpm_table->count - 1].value >
++                      skutable->DriverReportedClocks.GameClockAc)) {
++                      dpm_table->dpm_levels[dpm_table->count - 1].value =
++                              skutable->DriverReportedClocks.GameClockAc;
++                      dpm_table->max = skutable->DriverReportedClocks.GameClockAc;
++              }
+       } else {
+               dpm_table->count = 1;
+               dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100;
+@@ -828,6 +836,57 @@ static int smu_v13_0_7_get_smu_metrics_d
+       return ret;
+ }
++static int smu_v13_0_7_get_dpm_ultimate_freq(struct smu_context *smu,
++                                           enum smu_clk_type clk_type,
++                                           uint32_t *min,
++                                           uint32_t *max)
++{
++      struct smu_13_0_dpm_context *dpm_context =
++              smu->smu_dpm.dpm_context;
++      struct smu_13_0_dpm_table *dpm_table;
++
++      switch (clk_type) {
++      case SMU_MCLK:
++      case SMU_UCLK:
++              /* uclk dpm table */
++              dpm_table = &dpm_context->dpm_tables.uclk_table;
++              break;
++      case SMU_GFXCLK:
++      case SMU_SCLK:
++              /* gfxclk dpm table */
++              dpm_table = &dpm_context->dpm_tables.gfx_table;
++              break;
++      case SMU_SOCCLK:
++              /* socclk dpm table */
++              dpm_table = &dpm_context->dpm_tables.soc_table;
++              break;
++      case SMU_FCLK:
++              /* fclk dpm table */
++              dpm_table = &dpm_context->dpm_tables.fclk_table;
++              break;
++      case SMU_VCLK:
++      case SMU_VCLK1:
++              /* vclk dpm table */
++              dpm_table = &dpm_context->dpm_tables.vclk_table;
++              break;
++      case SMU_DCLK:
++      case SMU_DCLK1:
++              /* dclk dpm table */
++              dpm_table = &dpm_context->dpm_tables.dclk_table;
++              break;
++      default:
++              dev_err(smu->adev->dev, "Unsupported clock type!\n");
++              return -EINVAL;
++      }
++
++      if (min)
++              *min = dpm_table->min;
++      if (max)
++              *max = dpm_table->max;
++
++      return 0;
++}
++
+ static int smu_v13_0_7_read_sensor(struct smu_context *smu,
+                                  enum amd_pp_sensors sensor,
+                                  void *data,
+@@ -1684,7 +1743,7 @@ static const struct pptable_funcs smu_v1
+       .dpm_set_jpeg_enable = smu_v13_0_set_jpeg_enable,
+       .init_pptable_microcode = smu_v13_0_init_pptable_microcode,
+       .populate_umd_state_clk = smu_v13_0_7_populate_umd_state_clk,
+-      .get_dpm_ultimate_freq = smu_v13_0_get_dpm_ultimate_freq,
++      .get_dpm_ultimate_freq = smu_v13_0_7_get_dpm_ultimate_freq,
+       .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values,
+       .read_sensor = smu_v13_0_7_read_sensor,
+       .feature_is_enabled = smu_cmn_feature_is_enabled,
diff --git a/queue-6.2/drm-amd-pm-correct-smu13.0.7-pstate-profiling-clock-settings.patch b/queue-6.2/drm-amd-pm-correct-smu13.0.7-pstate-profiling-clock-settings.patch
new file mode 100644 (file)
index 0000000..79cb671
--- /dev/null
@@ -0,0 +1,61 @@
+From f06b8887e3ef4f50098d3a949aef392c529c831a Mon Sep 17 00:00:00 2001
+From: Horatio Zhang <Hongkun.Zhang@amd.com>
+Date: Thu, 6 Apr 2023 11:17:38 +0800
+Subject: drm/amd/pm: correct SMU13.0.7 pstate profiling clock settings
+
+From: Horatio Zhang <Hongkun.Zhang@amd.com>
+
+commit f06b8887e3ef4f50098d3a949aef392c529c831a upstream.
+
+Correct the pstate standard/peak profiling mode clock
+settings for SMU13.0.7.
+
+Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com>
+Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.1.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c |   22 ++++++++++++-------
+ 1 file changed, 15 insertions(+), 7 deletions(-)
+
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+@@ -1329,9 +1329,17 @@ static int smu_v13_0_7_populate_umd_stat
+                               &dpm_context->dpm_tables.fclk_table;
+       struct smu_umd_pstate_table *pstate_table =
+                               &smu->pstate_table;
++      struct smu_table_context *table_context = &smu->smu_table;
++      PPTable_t *pptable = table_context->driver_pptable;
++      DriverReportedClocks_t driver_clocks =
++              pptable->SkuTable.DriverReportedClocks;
+       pstate_table->gfxclk_pstate.min = gfx_table->min;
+-      pstate_table->gfxclk_pstate.peak = gfx_table->max;
++      if (driver_clocks.GameClockAc &&
++              (driver_clocks.GameClockAc < gfx_table->max))
++              pstate_table->gfxclk_pstate.peak = driver_clocks.GameClockAc;
++      else
++              pstate_table->gfxclk_pstate.peak = gfx_table->max;
+       pstate_table->uclk_pstate.min = mem_table->min;
+       pstate_table->uclk_pstate.peak = mem_table->max;
+@@ -1348,12 +1356,12 @@ static int smu_v13_0_7_populate_umd_stat
+       pstate_table->fclk_pstate.min = fclk_table->min;
+       pstate_table->fclk_pstate.peak = fclk_table->max;
+-      /*
+-       * For now, just use the mininum clock frequency.
+-       * TODO: update them when the real pstate settings available
+-       */
+-      pstate_table->gfxclk_pstate.standard = gfx_table->min;
+-      pstate_table->uclk_pstate.standard = mem_table->min;
++      if (driver_clocks.BaseClockAc &&
++              driver_clocks.BaseClockAc < gfx_table->max)
++              pstate_table->gfxclk_pstate.standard = driver_clocks.BaseClockAc;
++      else
++              pstate_table->gfxclk_pstate.standard = gfx_table->max;
++      pstate_table->uclk_pstate.standard = mem_table->max;
+       pstate_table->socclk_pstate.standard = soc_table->min;
+       pstate_table->vclk_pstate.standard = vclk_table->min;
+       pstate_table->dclk_pstate.standard = dclk_table->min;
diff --git a/queue-6.2/drm-amd-pm-correct-the-pcie-link-state-check-for-smu13.patch b/queue-6.2/drm-amd-pm-correct-the-pcie-link-state-check-for-smu13.patch
new file mode 100644 (file)
index 0000000..6529664
--- /dev/null
@@ -0,0 +1,64 @@
+From b9a24d8bd51e2db425602fa82d7f4c06aa3db852 Mon Sep 17 00:00:00 2001
+From: Evan Quan <evan.quan@amd.com>
+Date: Fri, 7 Apr 2023 17:12:15 +0800
+Subject: drm/amd/pm: correct the pcie link state check for SMU13
+
+From: Evan Quan <evan.quan@amd.com>
+
+commit b9a24d8bd51e2db425602fa82d7f4c06aa3db852 upstream.
+
+Update the driver implementations to fit those data exposed
+by PMFW.
+
+Signed-off-by: Evan Quan <evan.quan@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.1.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h         |    6 ++++++
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c |    4 ++--
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c |    4 ++--
+ 3 files changed, 10 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+@@ -61,6 +61,12 @@
+ #define CTF_OFFSET_HOTSPOT            5
+ #define CTF_OFFSET_MEM                        5
++static const int pmfw_decoded_link_speed[5] = {1, 2, 3, 4, 5};
++static const int pmfw_decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16};
++
++#define DECODE_GEN_SPEED(gen_speed_idx)               (pmfw_decoded_link_speed[gen_speed_idx])
++#define DECODE_LANE_WIDTH(lane_width_idx)     (pmfw_decoded_link_width[lane_width_idx])
++
+ struct smu_13_0_max_sustainable_clocks {
+       uint32_t display_clock;
+       uint32_t phy_clock;
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+@@ -1125,8 +1125,8 @@ static int smu_v13_0_0_print_clk_levels(
+                                       (pcie_table->pcie_lane[i] == 5) ? "x12" :
+                                       (pcie_table->pcie_lane[i] == 6) ? "x16" : "",
+                                       pcie_table->clk_freq[i],
+-                                      ((gen_speed - 1) == pcie_table->pcie_gen[i]) &&
+-                                      (lane_width == link_width[pcie_table->pcie_lane[i]]) ?
++                                      (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) &&
++                                      (lane_width == DECODE_LANE_WIDTH(link_width[pcie_table->pcie_lane[i]])) ?
+                                       "*" : "");
+               break;
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+@@ -1074,8 +1074,8 @@ static int smu_v13_0_7_print_clk_levels(
+                                       (pcie_table->pcie_lane[i] == 5) ? "x12" :
+                                       (pcie_table->pcie_lane[i] == 6) ? "x16" : "",
+                                       pcie_table->clk_freq[i],
+-                                      (gen_speed == pcie_table->pcie_gen[i]) &&
+-                                      (lane_width == pcie_table->pcie_lane[i]) ?
++                                      (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) &&
++                                      (lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ?
+                                       "*" : "");
+               break;
diff --git a/queue-6.2/mptcp-fix-null-pointer-dereference-on-fastopen-early-fallback.patch b/queue-6.2/mptcp-fix-null-pointer-dereference-on-fastopen-early-fallback.patch
new file mode 100644 (file)
index 0000000..96ad9ab
--- /dev/null
@@ -0,0 +1,50 @@
+From c0ff6f6da66a7791a32c0234388b1bdc00244917 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Tue, 11 Apr 2023 22:42:11 +0200
+Subject: mptcp: fix NULL pointer dereference on fastopen early fallback
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit c0ff6f6da66a7791a32c0234388b1bdc00244917 upstream.
+
+In case of early fallback to TCP, subflow_syn_recv_sock() deletes
+the subflow context before returning the newly allocated sock to
+the caller.
+
+The fastopen path does not cope with the above unconditionally
+dereferencing the subflow context.
+
+Fixes: 36b122baf6a8 ("mptcp: add subflow_v(4,6)_send_synack()")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/fastopen.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/net/mptcp/fastopen.c
++++ b/net/mptcp/fastopen.c
+@@ -9,11 +9,18 @@
+ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
+                                             struct request_sock *req)
+ {
+-      struct sock *ssk = subflow->tcp_sock;
+-      struct sock *sk = subflow->conn;
++      struct sock *sk, *ssk;
+       struct sk_buff *skb;
+       struct tcp_sock *tp;
++      /* on early fallback the subflow context is deleted by
++       * subflow_syn_recv_sock()
++       */
++      if (!subflow)
++              return;
++
++      ssk = subflow->tcp_sock;
++      sk = subflow->conn;
+       tp = tcp_sk(ssk);
+       subflow->is_mptfo = 1;
diff --git a/queue-6.2/mptcp-stricter-state-check-in-mptcp_worker.patch b/queue-6.2/mptcp-stricter-state-check-in-mptcp_worker.patch
new file mode 100644 (file)
index 0000000..def374f
--- /dev/null
@@ -0,0 +1,80 @@
+From d6a0443733434408f2cbd4c53fea6910599bab9e Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Tue, 11 Apr 2023 22:42:10 +0200
+Subject: mptcp: stricter state check in mptcp_worker
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit d6a0443733434408f2cbd4c53fea6910599bab9e upstream.
+
+As reported by Christoph, the mptcp protocol can run the
+worker when the relevant msk socket is in an unexpected state:
+
+connect()
+// incoming reset + fastclose
+// the mptcp worker is scheduled
+mptcp_disconnect()
+// msk is now CLOSED
+listen()
+mptcp_worker()
+
+Leading to the following splat:
+
+divide error: 0000 [#1] PREEMPT SMP
+CPU: 1 PID: 21 Comm: kworker/1:0 Not tainted 6.3.0-rc1-gde5e8fd0123c #11
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
+Workqueue: events mptcp_worker
+RIP: 0010:__tcp_select_window+0x22c/0x4b0 net/ipv4/tcp_output.c:3018
+RSP: 0018:ffffc900000b3c98 EFLAGS: 00010293
+RAX: 000000000000ffd7 RBX: 000000000000ffd7 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: ffffffff8214ce97 RDI: 0000000000000004
+RBP: 000000000000ffd7 R08: 0000000000000004 R09: 0000000000010000
+R10: 000000000000ffd7 R11: ffff888005afa148 R12: 000000000000ffd7
+R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+FS:  0000000000000000(0000) GS:ffff88803ed00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000405270 CR3: 000000003011e006 CR4: 0000000000370ee0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ tcp_select_window net/ipv4/tcp_output.c:262 [inline]
+ __tcp_transmit_skb+0x356/0x1280 net/ipv4/tcp_output.c:1345
+ tcp_transmit_skb net/ipv4/tcp_output.c:1417 [inline]
+ tcp_send_active_reset+0x13e/0x320 net/ipv4/tcp_output.c:3459
+ mptcp_check_fastclose net/mptcp/protocol.c:2530 [inline]
+ mptcp_worker+0x6c7/0x800 net/mptcp/protocol.c:2705
+ process_one_work+0x3bd/0x950 kernel/workqueue.c:2390
+ worker_thread+0x5b/0x610 kernel/workqueue.c:2537
+ kthread+0x138/0x170 kernel/kthread.c:376
+ ret_from_fork+0x2c/0x50 arch/x86/entry/entry_64.S:308
+ </TASK>
+
+This change addresses the issue explicitly checking for bad states
+before running the mptcp worker.
+
+Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close")
+Cc: stable@vger.kernel.org
+Reported-by: Christoph Paasch <cpaasch@apple.com>
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/374
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Tested-by: Christoph Paasch <cpaasch@apple.com>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2627,7 +2627,7 @@ static void mptcp_worker(struct work_str
+       lock_sock(sk);
+       state = sk->sk_state;
+-      if (unlikely(state == TCP_CLOSE))
++      if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))
+               goto unlock;
+       mptcp_check_data_fin_ack(sk);
diff --git a/queue-6.2/mptcp-use-mptcp_schedule_work-instead-of-open-coding-it.patch b/queue-6.2/mptcp-use-mptcp_schedule_work-instead-of-open-coding-it.patch
new file mode 100644 (file)
index 0000000..ba0671b
--- /dev/null
@@ -0,0 +1,86 @@
+From a5cb752b125766524c921faab1a45cc96065b0a7 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Tue, 11 Apr 2023 22:42:09 +0200
+Subject: mptcp: use mptcp_schedule_work instead of open-coding it
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit a5cb752b125766524c921faab1a45cc96065b0a7 upstream.
+
+Beyond reducing code duplication this also avoids scheduling
+the mptcp_worker on a closed socket on some edge scenarios.
+
+The addressed issue is actually older than the blamed commit
+below, but this fix needs it as a pre-requisite.
+
+Fixes: ba8f48f7a4d7 ("mptcp: introduce mptcp_schedule_work")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/options.c |    5 ++---
+ net/mptcp/subflow.c |   18 ++++++------------
+ 2 files changed, 8 insertions(+), 15 deletions(-)
+
+--- a/net/mptcp/options.c
++++ b/net/mptcp/options.c
+@@ -1192,9 +1192,8 @@ bool mptcp_incoming_options(struct sock
+        */
+       if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
+               if (mp_opt.data_fin && mp_opt.data_len == 1 &&
+-                  mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) &&
+-                  schedule_work(&msk->work))
+-                      sock_hold(subflow->conn);
++                  mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64))
++                      mptcp_schedule_work((struct sock *)msk);
+               return true;
+       }
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -407,9 +407,8 @@ void mptcp_subflow_reset(struct sock *ss
+       tcp_send_active_reset(ssk, GFP_ATOMIC);
+       tcp_done(ssk);
+-      if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
+-          schedule_work(&mptcp_sk(sk)->work))
+-              return; /* worker will put sk for us */
++      if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags))
++              mptcp_schedule_work(sk);
+       sock_put(sk);
+ }
+@@ -1117,8 +1116,8 @@ static enum mapping_status get_mapping_s
+                               skb_ext_del(skb, SKB_EXT_MPTCP);
+                               return MAPPING_OK;
+                       } else {
+-                              if (updated && schedule_work(&msk->work))
+-                                      sock_hold((struct sock *)msk);
++                              if (updated)
++                                      mptcp_schedule_work((struct sock *)msk);
+                               return MAPPING_DATA_FIN;
+                       }
+@@ -1221,17 +1220,12 @@ static void mptcp_subflow_discard_data(s
+ /* sched mptcp worker to remove the subflow if no more data is pending */
+ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
+ {
+-      struct sock *sk = (struct sock *)msk;
+-
+       if (likely(ssk->sk_state != TCP_CLOSE))
+               return;
+       if (skb_queue_empty(&ssk->sk_receive_queue) &&
+-          !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) {
+-              sock_hold(sk);
+-              if (!schedule_work(&msk->work))
+-                      sock_put(sk);
+-      }
++          !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
++              mptcp_schedule_work((struct sock *)msk);
+ }
+ static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
diff --git a/queue-6.2/pci-fix-use-after-free-in-pci_bus_release_domain_nr.patch b/queue-6.2/pci-fix-use-after-free-in-pci_bus_release_domain_nr.patch
new file mode 100644 (file)
index 0000000..b4be729
--- /dev/null
@@ -0,0 +1,100 @@
+From 30ba2d09edb5ea857a1473ae3d820911347ada62 Mon Sep 17 00:00:00 2001
+From: Rob Herring <robh@kernel.org>
+Date: Wed, 29 Mar 2023 07:38:35 -0500
+Subject: PCI: Fix use-after-free in pci_bus_release_domain_nr()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Rob Herring <robh@kernel.org>
+
+commit 30ba2d09edb5ea857a1473ae3d820911347ada62 upstream.
+
+Commit c14f7ccc9f5d ("PCI: Assign PCI domain IDs by ida_alloc()")
+introduced a use-after-free bug in the bus removal cleanup. The issue was
+found with kfence:
+
+  [   19.293351] BUG: KFENCE: use-after-free read in pci_bus_release_domain_nr+0x10/0x70
+
+  [   19.302817] Use-after-free read at 0x000000007f3b80eb (in kfence-#115):
+  [   19.309677]  pci_bus_release_domain_nr+0x10/0x70
+  [   19.309691]  dw_pcie_host_deinit+0x28/0x78
+  [   19.309702]  tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
+  [   19.309734]  tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
+  [   19.309752]  platform_probe+0x90/0xd8
+  ...
+
+  [   19.311457] kfence-#115: 0x00000000063a155a-0x00000000ba698da8, size=1072, cache=kmalloc-2k
+
+  [   19.311469] allocated by task 96 on cpu 10 at 19.279323s:
+  [   19.311562]  __kmem_cache_alloc_node+0x260/0x278
+  [   19.311571]  kmalloc_trace+0x24/0x30
+  [   19.311580]  pci_alloc_bus+0x24/0xa0
+  [   19.311590]  pci_register_host_bridge+0x48/0x4b8
+  [   19.311601]  pci_scan_root_bus_bridge+0xc0/0xe8
+  [   19.311613]  pci_host_probe+0x18/0xc0
+  [   19.311623]  dw_pcie_host_init+0x2c0/0x568
+  [   19.311630]  tegra_pcie_dw_probe+0x610/0xb28 [pcie_tegra194]
+  [   19.311647]  platform_probe+0x90/0xd8
+  ...
+
+  [   19.311782] freed by task 96 on cpu 10 at 19.285833s:
+  [   19.311799]  release_pcibus_dev+0x30/0x40
+  [   19.311808]  device_release+0x30/0x90
+  [   19.311814]  kobject_put+0xa8/0x120
+  [   19.311832]  device_unregister+0x20/0x30
+  [   19.311839]  pci_remove_bus+0x78/0x88
+  [   19.311850]  pci_remove_root_bus+0x5c/0x98
+  [   19.311860]  dw_pcie_host_deinit+0x28/0x78
+  [   19.311866]  tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
+  [   19.311883]  tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
+  [   19.311900]  platform_probe+0x90/0xd8
+  ...
+
+  [   19.313579] CPU: 10 PID: 96 Comm: kworker/u24:2 Not tainted 6.2.0 #4
+  [   19.320171] Hardware name:  /, BIOS 1.0-d7fb19b 08/10/2022
+  [   19.325852] Workqueue: events_unbound deferred_probe_work_func
+
+The stack trace is a bit misleading as dw_pcie_host_deinit() doesn't
+directly call pci_bus_release_domain_nr(). The issue turns out to be in
+pci_remove_root_bus() which first calls pci_remove_bus() which frees the
+struct pci_bus when its struct device is released. Then
+pci_bus_release_domain_nr() is called and accesses the freed struct
+pci_bus. Reordering these fixes the issue.
+
+Fixes: c14f7ccc9f5d ("PCI: Assign PCI domain IDs by ida_alloc()")
+Link: https://lore.kernel.org/r/20230329123835.2724518-1-robh@kernel.org
+Link: https://lore.kernel.org/r/b529cb69-0602-9eed-fc02-2f068707a006@nvidia.com
+Reported-by: Jon Hunter <jonathanh@nvidia.com>
+Tested-by: Jon Hunter <jonathanh@nvidia.com>
+Signed-off-by: Rob Herring <robh@kernel.org>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Cc: stable@vger.kernel.org     # v6.2+
+Cc: Pali Rohár <pali@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/remove.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/pci/remove.c
++++ b/drivers/pci/remove.c
+@@ -157,8 +157,6 @@ void pci_remove_root_bus(struct pci_bus
+       list_for_each_entry_safe(child, tmp,
+                                &bus->devices, bus_list)
+               pci_remove_bus_device(child);
+-      pci_remove_bus(bus);
+-      host_bridge->bus = NULL;
+ #ifdef CONFIG_PCI_DOMAINS_GENERIC
+       /* Release domain_nr if it was dynamically allocated */
+@@ -166,6 +164,9 @@ void pci_remove_root_bus(struct pci_bus
+               pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
+ #endif
++      pci_remove_bus(bus);
++      host_bridge->bus = NULL;
++
+       /* remove the host bridge */
+       device_del(&host_bridge->dev);
+ }
diff --git a/queue-6.2/pci-msi-provide-missing-stub-for-pci_msix_can_alloc_dyn.patch b/queue-6.2/pci-msi-provide-missing-stub-for-pci_msix_can_alloc_dyn.patch
new file mode 100644 (file)
index 0000000..185c9dc
--- /dev/null
@@ -0,0 +1,48 @@
+From 195d8e5da3acb17c5357526494f818a21e97cd10 Mon Sep 17 00:00:00 2001
+From: Reinette Chatre <reinette.chatre@intel.com>
+Date: Wed, 29 Mar 2023 13:13:11 -0700
+Subject: PCI/MSI: Provide missing stub for pci_msix_can_alloc_dyn()
+
+From: Reinette Chatre <reinette.chatre@intel.com>
+
+commit 195d8e5da3acb17c5357526494f818a21e97cd10 upstream.
+
+pci_msix_can_alloc_dyn() is not declared when CONFIG_PCI_MSI is disabled.
+
+There is no existing user of pci_msix_can_alloc_dyn() but work is in
+progress to change this. This work encounters the following error when
+CONFIG_PCI_MSI is disabled:
+
+  drivers/vfio/pci/vfio_pci_intrs.c:427:21: error: implicit declaration of function 'pci_msix_can_alloc_dyn' [-Werror=implicit-function-declaration]
+
+Provide definition for pci_msix_can_alloc_dyn() in preparation for users
+that need to compile when CONFIG_PCI_MSI is disabled.
+
+[bhelgaas: Also reported by Arnd Bergmann <arnd@kernel.org> in
+drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c; added his Fixes: line]
+
+Fixes: fb0a6a268dcd ("net/mlx5: Provide external API for allocating vectors")
+Fixes: 34026364df8e ("PCI/MSI: Provide post-enable dynamic allocation interfaces for MSI-X")
+Link: https://lore.kernel.org/oe-kbuild-all/202303291000.PWFqGCxH-lkp@intel.com/
+Link: https://lore.kernel.org/r/310ecc4815dae4174031062f525245f0755c70e2.1680119924.git.reinette.chatre@intel.com
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Cc: stable@vger.kernel.org     # v6.2+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/pci.h |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -1623,6 +1623,8 @@ pci_alloc_irq_vectors(struct pci_dev *de
+                                             flags, NULL);
+ }
++static inline bool pci_msix_can_alloc_dyn(struct pci_dev *dev)
++{ return false; }
+ static inline struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index,
+                                                  const struct irq_affinity_desc *affdesc)
+ {
diff --git a/queue-6.2/scsi-ses-handle-enclosure-with-just-a-primary-component-gracefully.patch b/queue-6.2/scsi-ses-handle-enclosure-with-just-a-primary-component-gracefully.patch
new file mode 100644 (file)
index 0000000..0e48224
--- /dev/null
@@ -0,0 +1,141 @@
+From c8e22b7a1694bb8d025ea636816472739d859145 Mon Sep 17 00:00:00 2001
+From: Jiri Kosina <jkosina@suse.cz>
+Date: Tue, 4 Apr 2023 21:23:42 +0200
+Subject: scsi: ses: Handle enclosure with just a primary component gracefully
+
+From: Jiri Kosina <jkosina@suse.cz>
+
+commit c8e22b7a1694bb8d025ea636816472739d859145 upstream.
+
+This reverts commit 3fe97ff3d949 ("scsi: ses: Don't attach if enclosure
+has no components") and introduces proper handling of case where there are
+no detected secondary components, but primary component (enumerated in
+num_enclosures) does exist. That fix was originally proposed by Ding Hui
+<dinghui@sangfor.com.cn>.
+
+Completely ignoring devices that have one primary enclosure and no
+secondary one results in ses_intf_add() bailing completely
+
+       scsi 2:0:0:254: enclosure has no enumerated components
+        scsi 2:0:0:254: Failed to bind enclosure -12ven in valid configurations such
+
+even on valid configurations with 1 primary and 0 secondary enclosures as
+below:
+
+       # sg_ses /dev/sg0
+         3PARdata  SES               3321
+       Supported diagnostic pages:
+         Supported Diagnostic Pages [sdp] [0x0]
+         Configuration (SES) [cf] [0x1]
+         Short Enclosure Status (SES) [ses] [0x8]
+       # sg_ses -p cf /dev/sg0
+         3PARdata  SES               3321
+       Configuration diagnostic page:
+         number of secondary subenclosures: 0
+         generation code: 0x0
+         enclosure descriptor list
+           Subenclosure identifier: 0 [primary]
+             relative ES process id: 0, number of ES processes: 1
+             number of type descriptor headers: 1
+             enclosure logical identifier (hex): 20000002ac02068d
+             enclosure vendor: 3PARdata  product: VV                rev: 3321
+         type descriptor header and text list
+           Element type: Unspecified, subenclosure id: 0
+             number of possible elements: 1
+
+The changelog for the original fix follows
+
+=====
+We can get a crash when disconnecting the iSCSI session,
+the call trace like this:
+
+  [ffff00002a00fb70] kfree at ffff00000830e224
+  [ffff00002a00fba0] ses_intf_remove at ffff000001f200e4
+  [ffff00002a00fbd0] device_del at ffff0000086b6a98
+  [ffff00002a00fc50] device_unregister at ffff0000086b6d58
+  [ffff00002a00fc70] __scsi_remove_device at ffff00000870608c
+  [ffff00002a00fca0] scsi_remove_device at ffff000008706134
+  [ffff00002a00fcc0] __scsi_remove_target at ffff0000087062e4
+  [ffff00002a00fd10] scsi_remove_target at ffff0000087064c0
+  [ffff00002a00fd70] __iscsi_unbind_session at ffff000001c872c4
+  [ffff00002a00fdb0] process_one_work at ffff00000810f35c
+  [ffff00002a00fe00] worker_thread at ffff00000810f648
+  [ffff00002a00fe70] kthread at ffff000008116e98
+
+In ses_intf_add, components count could be 0, and kcalloc 0 size scomp,
+but not saved in edev->component[i].scratch
+
+In this situation, edev->component[0].scratch is an invalid pointer,
+when kfree it in ses_intf_remove_enclosure, a crash like above would happen
+The call trace also could be other random cases when kfree cannot catch
+the invalid pointer
+
+We should not use edev->component[] array when the components count is 0
+We also need check index when use edev->component[] array in
+ses_enclosure_data_process
+=====
+
+Reported-by: Michal Kolar <mich.k@seznam.cz>
+Originally-by: Ding Hui <dinghui@sangfor.com.cn>
+Cc: stable@vger.kernel.org
+Fixes: 3fe97ff3d949 ("scsi: ses: Don't attach if enclosure has no components")
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Link: https://lore.kernel.org/r/nycvar.YFH.7.76.2304042122270.29760@cbobk.fhfr.pm
+Tested-by: Michal Kolar <mich.k@seznam.cz>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c |   20 ++++++++------------
+ 1 file changed, 8 insertions(+), 12 deletions(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -503,9 +503,6 @@ static int ses_enclosure_find_by_addr(st
+       int i;
+       struct ses_component *scomp;
+-      if (!edev->component[0].scratch)
+-              return 0;
+-
+       for (i = 0; i < edev->components; i++) {
+               scomp = edev->component[i].scratch;
+               if (scomp->addr != efd->addr)
+@@ -596,8 +593,10 @@ static void ses_enclosure_data_process(s
+                                               components++,
+                                               type_ptr[0],
+                                               name);
+-                              else
++                              else if (components < edev->components)
+                                       ecomp = &edev->component[components++];
++                              else
++                                      ecomp = ERR_PTR(-EINVAL);
+                               if (!IS_ERR(ecomp)) {
+                                       if (addl_desc_ptr) {
+@@ -728,11 +727,6 @@ static int ses_intf_add(struct device *c
+                       components += type_ptr[1];
+       }
+-      if (components == 0) {
+-              sdev_printk(KERN_WARNING, sdev, "enclosure has no enumerated components\n");
+-              goto err_free;
+-      }
+-
+       ses_dev->page1 = buf;
+       ses_dev->page1_len = len;
+       buf = NULL;
+@@ -774,9 +768,11 @@ static int ses_intf_add(struct device *c
+               buf = NULL;
+       }
+ page2_not_supported:
+-      scomp = kcalloc(components, sizeof(struct ses_component), GFP_KERNEL);
+-      if (!scomp)
+-              goto err_free;
++      if (components > 0) {
++              scomp = kcalloc(components, sizeof(struct ses_component), GFP_KERNEL);
++              if (!scomp)
++                      goto err_free;
++      }
+       edev = enclosure_register(cdev->parent, dev_name(&sdev->sdev_gendev),
+                                 components, &ses_enclosure_callbacks);
diff --git a/queue-6.2/selftests-mptcp-userspace-pm-uniform-verify-events.patch b/queue-6.2/selftests-mptcp-userspace-pm-uniform-verify-events.patch
new file mode 100644 (file)
index 0000000..bf418fc
--- /dev/null
@@ -0,0 +1,52 @@
+From 711ae788cbbb82818531b55e32b09518ee09a11a Mon Sep 17 00:00:00 2001
+From: Matthieu Baerts <matthieu.baerts@tessares.net>
+Date: Tue, 11 Apr 2023 22:42:12 +0200
+Subject: selftests: mptcp: userspace pm: uniform verify events
+
+From: Matthieu Baerts <matthieu.baerts@tessares.net>
+
+commit 711ae788cbbb82818531b55e32b09518ee09a11a upstream.
+
+Simply adding a "sleep" before checking something is usually not a good
+idea because the time that has been picked can not be enough or too
+much. The best is to wait for events with a timeout.
+
+In this selftest, 'sleep 0.5' is used more than 40 times. It is always
+used before calling a 'verify_*' function except for this
+verify_listener_events which has been added later.
+
+At the end, using all these 'sleep 0.5' seems to work: the slow CIs
+don't complain so far. Also because it doesn't take too much time, we
+can just add two more 'sleep 0.5' to uniform what is done before calling
+a 'verify_*' function. For the same reasons, we can also delay a bigger
+refactoring to replace all these 'sleep 0.5' by functions waiting for
+events instead of waiting for a fix time and hope for the best.
+
+Fixes: 6c73008aa301 ("selftests: mptcp: listener test for userspace PM")
+Cc: stable@vger.kernel.org
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/mptcp/userspace_pm.sh |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
++++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
+@@ -884,6 +884,7 @@ test_listener()
+               $client4_port > /dev/null 2>&1 &
+       local listener_pid=$!
++      sleep 0.5
+       verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port
+       # ADD_ADDR from client to server machine reusing the subflow port
+@@ -899,6 +900,7 @@ test_listener()
+       # Delete the listener from the client ns, if one was created
+       kill_wait $listener_pid
++      sleep 0.5
+       verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
+ }
index c70b1a2e5ecbe866812f9a39e67efaddc510bf3d..aac3054668e6471a13d95f2dfd6f3907341879e9 100644 (file)
@@ -109,3 +109,20 @@ hid-intel-ish-hid-fix-kernel-panic-during-warm-reset.patch
 net-sfp-initialize-sfp-i2c_block_size-at-sfp-allocation.patch
 net-phy-nxp-c45-tja11xx-add-remove-callback.patch
 net-phy-nxp-c45-tja11xx-fix-unsigned-long-multiplication-overflow.patch
+scsi-ses-handle-enclosure-with-just-a-primary-component-gracefully.patch
+thermal-intel-avoid-updating-unsupported-therm_status_clear-mask-bits.patch
+drm-amd-pm-correct-the-pcie-link-state-check-for-smu13.patch
+pci-fix-use-after-free-in-pci_bus_release_domain_nr.patch
+pci-msi-provide-missing-stub-for-pci_msix_can_alloc_dyn.patch
+x86-pci-add-quirk-for-amd-xhci-controller-that-loses-msi-x-state-in-d3hot.patch
+cgroup-fix-display-of-forceidle-time-at-root.patch
+cgroup-cpuset-fix-partition-root-s-cpuset.cpus-update-bug.patch
+cgroup-cpuset-wake-up-cpuset_attach_wq-tasks-in-cpuset_cancel_attach.patch
+cgroup-cpuset-make-cpuset_fork-handle-clone_into_cgroup-properly.patch
+cgroup-cpuset-add-cpuset_can_fork-and-cpuset_cancel_fork-methods.patch
+drm-amd-pm-correct-smu13.0.7-pstate-profiling-clock-settings.patch
+drm-amd-pm-correct-smu13.0.7-max-shader-clock-reporting.patch
+mptcp-use-mptcp_schedule_work-instead-of-open-coding-it.patch
+mptcp-stricter-state-check-in-mptcp_worker.patch
+mptcp-fix-null-pointer-dereference-on-fastopen-early-fallback.patch
+selftests-mptcp-userspace-pm-uniform-verify-events.patch
diff --git a/queue-6.2/thermal-intel-avoid-updating-unsupported-therm_status_clear-mask-bits.patch b/queue-6.2/thermal-intel-avoid-updating-unsupported-therm_status_clear-mask-bits.patch
new file mode 100644 (file)
index 0000000..e4ace66
--- /dev/null
@@ -0,0 +1,137 @@
+From 117e4e5bd9d47b89777dbf6b37a709dcfe59520f Mon Sep 17 00:00:00 2001
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Date: Mon, 10 Apr 2023 10:35:01 -0700
+Subject: thermal: intel: Avoid updating unsupported THERM_STATUS_CLEAR mask bits
+
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+commit 117e4e5bd9d47b89777dbf6b37a709dcfe59520f upstream.
+
+Some older processors don't allow BIT(13) and BIT(15) in the current
+mask set by "THERM_STATUS_CLEAR_CORE_MASK". This results in:
+
+unchecked MSR access error: WRMSR to 0x19c (tried to
+write 0x000000000000aaa8) at rIP: 0xffffffff816f66a6
+(throttle_active_work+0xa6/0x1d0)
+
+To avoid unchecked MSR issues, check CPUID for each relevant feature and
+use that information to set the supported feature bits only in the
+"clear" mask for cores. Do the same for the analogous package mask set
+by "THERM_STATUS_CLEAR_PKG_MASK".
+
+Introduce functions thermal_intr_init_core_clear_mask() and
+thermal_intr_init_pkg_clear_mask() to set core and package mask bits,
+respectively. These functions are called during initialization.
+
+Fixes: 6fe1e64b6026 ("thermal: intel: Prevent accidental clearing of HFI status")
+Reported-by: Rui Salvaterra <rsalvaterra@gmail.com>
+Link: https://lore.kernel.org/lkml/cdf43fb423368ee3994124a9e8c9b4f8d00712c6.camel@linux.intel.com/T/
+Tested-by: Rui Salvaterra <rsalvaterra@gmail.com>
+Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Cc: 6.2+ <stable@kernel.org> # 6.2+
+[ rjw: Renamed 2 funtions and 2 static variables, edited subject and
+  changelog ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/thermal/intel/therm_throt.c |   73 ++++++++++++++++++++++++++++++++----
+ 1 file changed, 66 insertions(+), 7 deletions(-)
+
+--- a/drivers/thermal/intel/therm_throt.c
++++ b/drivers/thermal/intel/therm_throt.c
+@@ -193,8 +193,67 @@ static const struct attribute_group ther
+ #define THERM_THROT_POLL_INTERVAL     HZ
+ #define THERM_STATUS_PROCHOT_LOG      BIT(1)
+-#define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
+-#define THERM_STATUS_CLEAR_PKG_MASK  (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
++static u64 therm_intr_core_clear_mask;
++static u64 therm_intr_pkg_clear_mask;
++
++static void thermal_intr_init_core_clear_mask(void)
++{
++      if (therm_intr_core_clear_mask)
++              return;
++
++      /*
++       * Reference: Intel SDM  Volume 4
++       * "Table 2-2. IA-32 Architectural MSRs", MSR 0x19C
++       * IA32_THERM_STATUS.
++       */
++
++      /*
++       * Bit 1, 3, 5: CPUID.01H:EDX[22] = 1. This driver will not
++       * enable interrupts, when 0 as it checks for X86_FEATURE_ACPI.
++       */
++      therm_intr_core_clear_mask = (BIT(1) | BIT(3) | BIT(5));
++
++      /*
++       * Bit 7 and 9: Thermal Threshold #1 and #2 log
++       * If CPUID.01H:ECX[8] = 1
++       */
++      if (boot_cpu_has(X86_FEATURE_TM2))
++              therm_intr_core_clear_mask |= (BIT(7) | BIT(9));
++
++      /* Bit 11: Power Limitation log (R/WC0) If CPUID.06H:EAX[4] = 1 */
++      if (boot_cpu_has(X86_FEATURE_PLN))
++              therm_intr_core_clear_mask |= BIT(11);
++
++      /*
++       * Bit 13: Current Limit log (R/WC0) If CPUID.06H:EAX[7] = 1
++       * Bit 15: Cross Domain Limit log (R/WC0) If CPUID.06H:EAX[7] = 1
++       */
++      if (boot_cpu_has(X86_FEATURE_HWP))
++              therm_intr_core_clear_mask |= (BIT(13) | BIT(15));
++}
++
++static void thermal_intr_init_pkg_clear_mask(void)
++{
++      if (therm_intr_pkg_clear_mask)
++              return;
++
++      /*
++       * Reference: Intel SDM  Volume 4
++       * "Table 2-2. IA-32 Architectural MSRs", MSR 0x1B1
++       * IA32_PACKAGE_THERM_STATUS.
++       */
++
++      /* All bits except BIT 26 depend on CPUID.06H: EAX[6] = 1 */
++      if (boot_cpu_has(X86_FEATURE_PTS))
++              therm_intr_pkg_clear_mask = (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11));
++
++      /*
++       * Intel SDM Volume 2A: Thermal and Power Management Leaf
++       * Bit 26: CPUID.06H: EAX[19] = 1
++       */
++      if (boot_cpu_has(X86_FEATURE_HFI))
++              therm_intr_pkg_clear_mask |= BIT(26);
++}
+ /*
+  * Clear the bits in package thermal status register for bit = 1
+@@ -207,13 +266,10 @@ void thermal_clear_package_intr_status(i
+       if (level == CORE_LEVEL) {
+               msr  = MSR_IA32_THERM_STATUS;
+-              msr_val = THERM_STATUS_CLEAR_CORE_MASK;
++              msr_val = therm_intr_core_clear_mask;
+       } else {
+               msr  = MSR_IA32_PACKAGE_THERM_STATUS;
+-              msr_val = THERM_STATUS_CLEAR_PKG_MASK;
+-              if (boot_cpu_has(X86_FEATURE_HFI))
+-                      msr_val |= BIT(26);
+-
++              msr_val = therm_intr_pkg_clear_mask;
+       }
+       msr_val &= ~bit_mask;
+@@ -708,6 +764,9 @@ void intel_init_thermal(struct cpuinfo_x
+       h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
+       apic_write(APIC_LVTTHMR, h);
++      thermal_intr_init_core_clear_mask();
++      thermal_intr_init_pkg_clear_mask();
++
+       rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+       if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
+               wrmsr(MSR_IA32_THERM_INTERRUPT,
diff --git a/queue-6.2/x86-pci-add-quirk-for-amd-xhci-controller-that-loses-msi-x-state-in-d3hot.patch b/queue-6.2/x86-pci-add-quirk-for-amd-xhci-controller-that-loses-msi-x-state-in-d3hot.patch
new file mode 100644 (file)
index 0000000..0b979eb
--- /dev/null
@@ -0,0 +1,65 @@
+From f195fc1e9715ba826c3b62d58038f760f66a4fe9 Mon Sep 17 00:00:00 2001
+From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+Date: Wed, 29 Mar 2023 22:58:59 +0530
+Subject: x86/PCI: Add quirk for AMD XHCI controller that loses MSI-X state in D3hot
+
+From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+
+commit f195fc1e9715ba826c3b62d58038f760f66a4fe9 upstream.
+
+The AMD [1022:15b8] USB controller loses some internal functional MSI-X
+context when transitioning from D0 to D3hot. BIOS normally traps D0->D3hot
+and D3hot->D0 transitions so it can save and restore that internal context,
+but some firmware in the field can't do this because it fails to clear the
+AMD_15B8_RCC_DEV2_EPF0_STRAP2 NO_SOFT_RESET bit.
+
+Clear AMD_15B8_RCC_DEV2_EPF0_STRAP2 NO_SOFT_RESET bit before USB controller
+initialization during boot.
+
+Link: https://lore.kernel.org/linux-usb/Y%2Fz9GdHjPyF2rNG3@glanzmann.de/T/#u
+Link: https://lore.kernel.org/r/20230329172859.699743-1-Basavaraj.Natikar@amd.com
+Reported-by: Thomas Glanzmann <thomas@glanzmann.de>
+Tested-by: Thomas Glanzmann <thomas@glanzmann.de>
+Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/pci/fixup.c |   21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+--- a/arch/x86/pci/fixup.c
++++ b/arch/x86/pci/fixup.c
+@@ -7,6 +7,7 @@
+ #include <linux/dmi.h>
+ #include <linux/pci.h>
+ #include <linux/vgaarb.h>
++#include <asm/amd_nb.h>
+ #include <asm/hpet.h>
+ #include <asm/pci_x86.h>
+@@ -824,3 +825,23 @@ static void rs690_fix_64bit_dma(struct p
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
+ #endif
++
++#ifdef CONFIG_AMD_NB
++
++#define AMD_15B8_RCC_DEV2_EPF0_STRAP2                                  0x10136008
++#define AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK       0x00000080L
++
++static void quirk_clear_strap_no_soft_reset_dev2_f0(struct pci_dev *dev)
++{
++      u32 data;
++
++      if (!amd_smn_read(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, &data)) {
++              data &= ~AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK;
++              if (amd_smn_write(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, data))
++                      pci_err(dev, "Failed to write data 0x%x\n", data);
++      } else {
++              pci_err(dev, "Failed to read data\n");
++      }
++}
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b8, quirk_clear_strap_no_soft_reset_dev2_f0);
++#endif