6.6-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 3 Apr 2024 17:47:30 +0000 (19:47 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 3 Apr 2024 17:47:30 +0000 (19:47 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 3 Apr 2024 17:47:30 +0000 (19:47 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 3 Apr 2024 17:47:30 +0000 (19:47 +0200)
diff --git a/queue-6.6/revert-workqueue-don-t-call-cpumask_test_cpu-with-1-cpu-in-wq_update_node_max_active.patch b/queue-6.6/revert-workqueue-don-t-call-cpumask_test_cpu-with-1-cpu-in-wq_update_node_max_active.patch

new file mode 100644 (file)

index 0000000..a81d0fa
--- /dev/null
+++ b/queue-6.6/revert-workqueue-don-t-call-cpumask_test_cpu-with-1-cpu-in-wq_update_node_max_active.patch
@@ -0,0 +1,36 @@
+From 408bc969ba46368c0c170f2ba822644c43ffe966 Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Wed, 3 Apr 2024 16:35:37 +0200
+Subject: Revert "workqueue: Don't call cpumask_test_cpu() with -1 CPU in wq_update_node_max_active()"
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+This reverts commit 7df62b8cca38aa452b508b477b16544cba615084 which is
+commit 15930da42f8981dc42c19038042947b475b19f47 upstream.
+
+The workqueue patches backported to 6.6.y caused some reported
+regressions, so revert them for now.
+
+Reported-by: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Sasha Levin <sashal@kernel.org>
+Cc: Audra Mitchell <audra@redhat.com>
+Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -1500,7 +1500,7 @@ static void wq_update_node_max_active(st
+ 
+       lockdep_assert_held(&wq->mutex);
+ 
+-      if (off_cpu >= 0 && !cpumask_test_cpu(off_cpu, effective))
++      if (!cpumask_test_cpu(off_cpu, effective))
+               off_cpu = -1;
+ 
+       total_cpus = cpumask_weight_and(effective, cpu_online_mask);
diff --git a/queue-6.6/revert-workqueue-factor-out-pwq_is_empty.patch b/queue-6.6/revert-workqueue-factor-out-pwq_is_empty.patch

new file mode 100644 (file)

index 0000000..ad4988b
--- /dev/null
+++ b/queue-6.6/revert-workqueue-factor-out-pwq_is_empty.patch
@@ -0,0 +1,75 @@
+From 5657d3e1d91861a39638e620d7aeaadc530a5a1e Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Wed, 3 Apr 2024 16:36:48 +0200
+Subject: Revert "workqueue: Factor out pwq_is_empty()"
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+This reverts commit bad184d26a4f68aa00ad75502f9669950a790f71 which is
+commit afa87ce85379e2d93863fce595afdb5771a84004 upstream.
+
+The workqueue patches backported to 6.6.y caused some reported
+regressions, so revert them for now.
+
+Reported-by: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Sasha Levin <sashal@kernel.org>
+Cc: Audra Mitchell <audra@redhat.com>
+Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c |   13 ++++---------
+ 1 file changed, 4 insertions(+), 9 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -1450,11 +1450,6 @@ static void put_pwq_unlocked(struct pool
+       }
+ }
+ 
+-static bool pwq_is_empty(struct pool_workqueue *pwq)
+-{
+-      return !pwq->nr_active && list_empty(&pwq->inactive_works);
+-}
+-
+ static void pwq_activate_inactive_work(struct work_struct *work)
+ {
+       struct pool_workqueue *pwq = get_work_pwq(work);
+@@ -3324,7 +3319,7 @@ reflush:
+               bool drained;
+ 
+               raw_spin_lock_irq(&pwq->pool->lock);
+-              drained = pwq_is_empty(pwq);
++              drained = !pwq->nr_active && list_empty(&pwq->inactive_works);
+               raw_spin_unlock_irq(&pwq->pool->lock);
+ 
+               if (drained)
+@@ -4784,7 +4779,7 @@ static bool pwq_busy(struct pool_workque
+ 
+       if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
+               return true;
+-      if (!pwq_is_empty(pwq))
++      if (pwq->nr_active || !list_empty(&pwq->inactive_works))
+               return true;
+ 
+       return false;
+@@ -5222,7 +5217,7 @@ void show_one_workqueue(struct workqueue
+       unsigned long flags;
+ 
+       for_each_pwq(pwq, wq) {
+-              if (!pwq_is_empty(pwq)) {
++              if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
+                       idle = false;
+                       break;
+               }
+@@ -5234,7 +5229,7 @@ void show_one_workqueue(struct workqueue
+ 
+       for_each_pwq(pwq, wq) {
+               raw_spin_lock_irqsave(&pwq->pool->lock, flags);
+-              if (!pwq_is_empty(pwq)) {
++              if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
+                       /*
+                        * Defer printing to avoid deadlocks in console
+                        * drivers that queue work while holding locks
diff --git a/queue-6.6/revert-workqueue-implement-system-wide-nr_active-enforcement-for-unbound-workqueues.patch b/queue-6.6/revert-workqueue-implement-system-wide-nr_active-enforcement-for-unbound-workqueues.patch

new file mode 100644 (file)

index 0000000..96413fb
--- /dev/null
+++ b/queue-6.6/revert-workqueue-implement-system-wide-nr_active-enforcement-for-unbound-workqueues.patch
@@ -0,0 +1,645 @@
+From a46197fa531d3f2cf00b43a84babd3bc6f14d656 Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Wed, 3 Apr 2024 16:36:17 +0200
+Subject: Revert "workqueue: Implement system-wide nr_active enforcement for unbound workqueues"
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+This reverts commit 5a70baec2294e8a7d0fcc4558741c23e752dad5c which is
+commit 5797b1c18919cd9c289ded7954383e499f729ce0 upstream.
+
+The workqueue patches backported to 6.6.y caused some reported
+regressions, so revert them for now.
+
+Reported-by: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Sasha Levin <sashal@kernel.org>
+Cc: Audra Mitchell <audra@redhat.com>
+Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/workqueue.h |   35 ----
+ kernel/workqueue.c        |  341 ++++------------------------------------------
+ 2 files changed, 35 insertions(+), 341 deletions(-)
+
+--- a/include/linux/workqueue.h
++++ b/include/linux/workqueue.h
+@@ -405,13 +405,6 @@ enum {
+       WQ_MAX_ACTIVE           = 512,    /* I like 512, better ideas? */
+       WQ_UNBOUND_MAX_ACTIVE   = WQ_MAX_ACTIVE,
+       WQ_DFL_ACTIVE           = WQ_MAX_ACTIVE / 2,
+-
+-      /*
+-       * Per-node default cap on min_active. Unless explicitly set, min_active
+-       * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see
+-       * workqueue_struct->min_active definition.
+-       */
+-      WQ_DFL_MIN_ACTIVE       = 8,
+ };
+ 
+ /*
+@@ -454,33 +447,11 @@ extern struct workqueue_struct *system_f
+  * alloc_workqueue - allocate a workqueue
+  * @fmt: printf format for the name of the workqueue
+  * @flags: WQ_* flags
+- * @max_active: max in-flight work items, 0 for default
++ * @max_active: max in-flight work items per CPU, 0 for default
+  * remaining args: args for @fmt
+  *
+- * For a per-cpu workqueue, @max_active limits the number of in-flight work
+- * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be
+- * executing at most one work item for the workqueue.
+- *
+- * For unbound workqueues, @max_active limits the number of in-flight work items
+- * for the whole system. e.g. @max_active of 16 indicates that that there can be
+- * at most 16 work items executing for the workqueue in the whole system.
+- *
+- * As sharing the same active counter for an unbound workqueue across multiple
+- * NUMA nodes can be expensive, @max_active is distributed to each NUMA node
+- * according to the proportion of the number of online CPUs and enforced
+- * independently.
+- *
+- * Depending on online CPU distribution, a node may end up with per-node
+- * max_active which is significantly lower than @max_active, which can lead to
+- * deadlocks if the per-node concurrency limit is lower than the maximum number
+- * of interdependent work items for the workqueue.
+- *
+- * To guarantee forward progress regardless of online CPU distribution, the
+- * concurrency limit on every node is guaranteed to be equal to or greater than
+- * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means
+- * that the sum of per-node max_active's may be larger than @max_active.
+- *
+- * For detailed information on %WQ_* flags, please refer to
++ * Allocate a workqueue with the specified parameters.  For detailed
++ * information on WQ_* flags, please refer to
+  * Documentation/core-api/workqueue.rst.
+  *
+  * RETURNS:
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -122,9 +122,6 @@ enum {
+  *
+  * L: pool->lock protected.  Access with pool->lock held.
+  *
+- * LN: pool->lock and wq_node_nr_active->lock protected for writes. Either for
+- *     reads.
+- *
+  * K: Only modified by worker while holding pool->lock. Can be safely read by
+  *    self, while holding pool->lock or from IRQ context if %current is the
+  *    kworker.
+@@ -246,18 +243,17 @@ struct pool_workqueue {
+        * pwq->inactive_works instead of pool->worklist and marked with
+        * WORK_STRUCT_INACTIVE.
+        *
+-       * All work items marked with WORK_STRUCT_INACTIVE do not participate in
+-       * nr_active and all work items in pwq->inactive_works are marked with
+-       * WORK_STRUCT_INACTIVE. But not all WORK_STRUCT_INACTIVE work items are
+-       * in pwq->inactive_works. Some of them are ready to run in
+-       * pool->worklist or worker->scheduled. Those work itmes are only struct
+-       * wq_barrier which is used for flush_work() and should not participate
+-       * in nr_active. For non-barrier work item, it is marked with
+-       * WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
++       * All work items marked with WORK_STRUCT_INACTIVE do not participate
++       * in pwq->nr_active and all work items in pwq->inactive_works are
++       * marked with WORK_STRUCT_INACTIVE.  But not all WORK_STRUCT_INACTIVE
++       * work items are in pwq->inactive_works.  Some of them are ready to
++       * run in pool->worklist or worker->scheduled.  Those work itmes are
++       * only struct wq_barrier which is used for flush_work() and should
++       * not participate in pwq->nr_active.  For non-barrier work item, it
++       * is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
+        */
+       int                     nr_active;      /* L: nr of active works */
+       struct list_head        inactive_works; /* L: inactive works */
+-      struct list_head        pending_node;   /* LN: node on wq_node_nr_active->pending_pwqs */
+       struct list_head        pwqs_node;      /* WR: node on wq->pwqs */
+       struct list_head        mayday_node;    /* MD: node on wq->maydays */
+ 
+@@ -289,19 +285,9 @@ struct wq_device;
+  * on each CPU, in an unbound workqueue, max_active applies to the whole system.
+  * As sharing a single nr_active across multiple sockets can be very expensive,
+  * the counting and enforcement is per NUMA node.
+- *
+- * The following struct is used to enforce per-node max_active. When a pwq wants
+- * to start executing a work item, it should increment ->nr using
+- * tryinc_node_nr_active(). If acquisition fails due to ->nr already being over
+- * ->max, the pwq is queued on ->pending_pwqs. As in-flight work items finish
+- * and decrement ->nr, node_activate_pending_pwq() activates the pending pwqs in
+- * round-robin order.
+  */
+ struct wq_node_nr_active {
+-      int                     max;            /* per-node max_active */
+-      atomic_t                nr;             /* per-node nr_active */
+-      raw_spinlock_t          lock;           /* nests inside pool locks */
+-      struct list_head        pending_pwqs;   /* LN: pwqs with inactive works */
++      atomic_t                nr;             /* per-node nr_active count */
+ };
+ 
+ /*
+@@ -324,12 +310,8 @@ struct workqueue_struct {
+       struct worker           *rescuer;       /* MD: rescue worker */
+ 
+       int                     nr_drainers;    /* WQ: drain in progress */
+-
+-      /* See alloc_workqueue() function comment for info on min/max_active */
+       int                     max_active;     /* WO: max active works */
+-      int                     min_active;     /* WO: min active works */
+       int                     saved_max_active; /* WQ: saved max_active */
+-      int                     saved_min_active; /* WQ: saved min_active */
+ 
+       struct workqueue_attrs  *unbound_attrs; /* PW: only for unbound wqs */
+       struct pool_workqueue __rcu *dfl_pwq;   /* PW: only for unbound wqs */
+@@ -675,19 +657,6 @@ static struct pool_workqueue *unbound_pw
+                                    lockdep_is_held(&wq->mutex));
+ }
+ 
+-/**
+- * unbound_effective_cpumask - effective cpumask of an unbound workqueue
+- * @wq: workqueue of interest
+- *
+- * @wq->unbound_attrs->cpumask contains the cpumask requested by the user which
+- * is masked with wq_unbound_cpumask to determine the effective cpumask. The
+- * default pwq is always mapped to the pool with the current effective cpumask.
+- */
+-static struct cpumask *unbound_effective_cpumask(struct workqueue_struct *wq)
+-{
+-      return unbound_pwq(wq, -1)->pool->attrs->__pod_cpumask;
+-}
+-
+ static unsigned int work_color_to_flags(int color)
+ {
+       return color << WORK_STRUCT_COLOR_SHIFT;
+@@ -1483,46 +1452,6 @@ static struct wq_node_nr_active *wq_node
+ }
+ 
+ /**
+- * wq_update_node_max_active - Update per-node max_actives to use
+- * @wq: workqueue to update
+- * @off_cpu: CPU that's going down, -1 if a CPU is not going down
+- *
+- * Update @wq->node_nr_active[]->max. @wq must be unbound. max_active is
+- * distributed among nodes according to the proportions of numbers of online
+- * cpus. The result is always between @wq->min_active and max_active.
+- */
+-static void wq_update_node_max_active(struct workqueue_struct *wq, int off_cpu)
+-{
+-      struct cpumask *effective = unbound_effective_cpumask(wq);
+-      int min_active = READ_ONCE(wq->min_active);
+-      int max_active = READ_ONCE(wq->max_active);
+-      int total_cpus, node;
+-
+-      lockdep_assert_held(&wq->mutex);
+-
+-      if (!cpumask_test_cpu(off_cpu, effective))
+-              off_cpu = -1;
+-
+-      total_cpus = cpumask_weight_and(effective, cpu_online_mask);
+-      if (off_cpu >= 0)
+-              total_cpus--;
+-
+-      for_each_node(node) {
+-              int node_cpus;
+-
+-              node_cpus = cpumask_weight_and(effective, cpumask_of_node(node));
+-              if (off_cpu >= 0 && cpu_to_node(off_cpu) == node)
+-                      node_cpus--;
+-
+-              wq_node_nr_active(wq, node)->max =
+-                      clamp(DIV_ROUND_UP(max_active * node_cpus, total_cpus),
+-                            min_active, max_active);
+-      }
+-
+-      wq_node_nr_active(wq, NUMA_NO_NODE)->max = min_active;
+-}
+-
+-/**
+  * get_pwq - get an extra reference on the specified pool_workqueue
+  * @pwq: pool_workqueue to get
+  *
+@@ -1619,98 +1548,35 @@ static bool pwq_activate_work(struct poo
+       return true;
+ }
+ 
+-static bool tryinc_node_nr_active(struct wq_node_nr_active *nna)
+-{
+-      int max = READ_ONCE(nna->max);
+-
+-      while (true) {
+-              int old, tmp;
+-
+-              old = atomic_read(&nna->nr);
+-              if (old >= max)
+-                      return false;
+-              tmp = atomic_cmpxchg_relaxed(&nna->nr, old, old + 1);
+-              if (tmp == old)
+-                      return true;
+-      }
+-}
+-
+ /**
+  * pwq_tryinc_nr_active - Try to increment nr_active for a pwq
+  * @pwq: pool_workqueue of interest
+- * @fill: max_active may have increased, try to increase concurrency level
+  *
+  * Try to increment nr_active for @pwq. Returns %true if an nr_active count is
+  * successfully obtained. %false otherwise.
+  */
+-static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq, bool fill)
++static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq)
+ {
+       struct workqueue_struct *wq = pwq->wq;
+       struct worker_pool *pool = pwq->pool;
+       struct wq_node_nr_active *nna = wq_node_nr_active(wq, pool->node);
+-      bool obtained = false;
++      bool obtained;
+ 
+       lockdep_assert_held(&pool->lock);
+ 
+-      if (!nna) {
+-              /* per-cpu workqueue, pwq->nr_active is sufficient */
+-              obtained = pwq->nr_active < READ_ONCE(wq->max_active);
+-              goto out;
+-      }
+-
+-      /*
+-       * Unbound workqueue uses per-node shared nr_active $nna. If @pwq is
+-       * already waiting on $nna, pwq_dec_nr_active() will maintain the
+-       * concurrency level. Don't jump the line.
+-       *
+-       * We need to ignore the pending test after max_active has increased as
+-       * pwq_dec_nr_active() can only maintain the concurrency level but not
+-       * increase it. This is indicated by @fill.
+-       */
+-      if (!list_empty(&pwq->pending_node) && likely(!fill))
+-              goto out;
+-
+-      obtained = tryinc_node_nr_active(nna);
+-      if (obtained)
+-              goto out;
+-
+-      /*
+-       * Lockless acquisition failed. Lock, add ourself to $nna->pending_pwqs
+-       * and try again. The smp_mb() is paired with the implied memory barrier
+-       * of atomic_dec_return() in pwq_dec_nr_active() to ensure that either
+-       * we see the decremented $nna->nr or they see non-empty
+-       * $nna->pending_pwqs.
+-       */
+-      raw_spin_lock(&nna->lock);
+-
+-      if (list_empty(&pwq->pending_node))
+-              list_add_tail(&pwq->pending_node, &nna->pending_pwqs);
+-      else if (likely(!fill))
+-              goto out_unlock;
+-
+-      smp_mb();
+-
+-      obtained = tryinc_node_nr_active(nna);
+-
+-      /*
+-       * If @fill, @pwq might have already been pending. Being spuriously
+-       * pending in cold paths doesn't affect anything. Let's leave it be.
+-       */
+-      if (obtained && likely(!fill))
+-              list_del_init(&pwq->pending_node);
++      obtained = pwq->nr_active < READ_ONCE(wq->max_active);
+ 
+-out_unlock:
+-      raw_spin_unlock(&nna->lock);
+-out:
+-      if (obtained)
++      if (obtained) {
+               pwq->nr_active++;
++              if (nna)
++                      atomic_inc(&nna->nr);
++      }
+       return obtained;
+ }
+ 
+ /**
+  * pwq_activate_first_inactive - Activate the first inactive work item on a pwq
+  * @pwq: pool_workqueue of interest
+- * @fill: max_active may have increased, try to increase concurrency level
+  *
+  * Activate the first inactive work item of @pwq if available and allowed by
+  * max_active limit.
+@@ -1718,13 +1584,13 @@ out:
+  * Returns %true if an inactive work item has been activated. %false if no
+  * inactive work item is found or max_active limit is reached.
+  */
+-static bool pwq_activate_first_inactive(struct pool_workqueue *pwq, bool fill)
++static bool pwq_activate_first_inactive(struct pool_workqueue *pwq)
+ {
+       struct work_struct *work =
+               list_first_entry_or_null(&pwq->inactive_works,
+                                        struct work_struct, entry);
+ 
+-      if (work && pwq_tryinc_nr_active(pwq, fill)) {
++      if (work && pwq_tryinc_nr_active(pwq)) {
+               __pwq_activate_work(pwq, work);
+               return true;
+       } else {
+@@ -1733,92 +1599,10 @@ static bool pwq_activate_first_inactive(
+ }
+ 
+ /**
+- * node_activate_pending_pwq - Activate a pending pwq on a wq_node_nr_active
+- * @nna: wq_node_nr_active to activate a pending pwq for
+- * @caller_pool: worker_pool the caller is locking
+- *
+- * Activate a pwq in @nna->pending_pwqs. Called with @caller_pool locked.
+- * @caller_pool may be unlocked and relocked to lock other worker_pools.
+- */
+-static void node_activate_pending_pwq(struct wq_node_nr_active *nna,
+-                                    struct worker_pool *caller_pool)
+-{
+-      struct worker_pool *locked_pool = caller_pool;
+-      struct pool_workqueue *pwq;
+-      struct work_struct *work;
+-
+-      lockdep_assert_held(&caller_pool->lock);
+-
+-      raw_spin_lock(&nna->lock);
+-retry:
+-      pwq = list_first_entry_or_null(&nna->pending_pwqs,
+-                                     struct pool_workqueue, pending_node);
+-      if (!pwq)
+-              goto out_unlock;
+-
+-      /*
+-       * If @pwq is for a different pool than @locked_pool, we need to lock
+-       * @pwq->pool->lock. Let's trylock first. If unsuccessful, do the unlock
+-       * / lock dance. For that, we also need to release @nna->lock as it's
+-       * nested inside pool locks.
+-       */
+-      if (pwq->pool != locked_pool) {
+-              raw_spin_unlock(&locked_pool->lock);
+-              locked_pool = pwq->pool;
+-              if (!raw_spin_trylock(&locked_pool->lock)) {
+-                      raw_spin_unlock(&nna->lock);
+-                      raw_spin_lock(&locked_pool->lock);
+-                      raw_spin_lock(&nna->lock);
+-                      goto retry;
+-              }
+-      }
+-
+-      /*
+-       * $pwq may not have any inactive work items due to e.g. cancellations.
+-       * Drop it from pending_pwqs and see if there's another one.
+-       */
+-      work = list_first_entry_or_null(&pwq->inactive_works,
+-                                      struct work_struct, entry);
+-      if (!work) {
+-              list_del_init(&pwq->pending_node);
+-              goto retry;
+-      }
+-
+-      /*
+-       * Acquire an nr_active count and activate the inactive work item. If
+-       * $pwq still has inactive work items, rotate it to the end of the
+-       * pending_pwqs so that we round-robin through them. This means that
+-       * inactive work items are not activated in queueing order which is fine
+-       * given that there has never been any ordering across different pwqs.
+-       */
+-      if (likely(tryinc_node_nr_active(nna))) {
+-              pwq->nr_active++;
+-              __pwq_activate_work(pwq, work);
+-
+-              if (list_empty(&pwq->inactive_works))
+-                      list_del_init(&pwq->pending_node);
+-              else
+-                      list_move_tail(&pwq->pending_node, &nna->pending_pwqs);
+-
+-              /* if activating a foreign pool, make sure it's running */
+-              if (pwq->pool != caller_pool)
+-                      kick_pool(pwq->pool);
+-      }
+-
+-out_unlock:
+-      raw_spin_unlock(&nna->lock);
+-      if (locked_pool != caller_pool) {
+-              raw_spin_unlock(&locked_pool->lock);
+-              raw_spin_lock(&caller_pool->lock);
+-      }
+-}
+-
+-/**
+  * pwq_dec_nr_active - Retire an active count
+  * @pwq: pool_workqueue of interest
+  *
+  * Decrement @pwq's nr_active and try to activate the first inactive work item.
+- * For unbound workqueues, this function may temporarily drop @pwq->pool->lock.
+  */
+ static void pwq_dec_nr_active(struct pool_workqueue *pwq)
+ {
+@@ -1838,29 +1622,12 @@ static void pwq_dec_nr_active(struct poo
+        * inactive work item on @pwq itself.
+        */
+       if (!nna) {
+-              pwq_activate_first_inactive(pwq, false);
++              pwq_activate_first_inactive(pwq);
+               return;
+       }
+ 
+-      /*
+-       * If @pwq is for an unbound workqueue, it's more complicated because
+-       * multiple pwqs and pools may be sharing the nr_active count. When a
+-       * pwq needs to wait for an nr_active count, it puts itself on
+-       * $nna->pending_pwqs. The following atomic_dec_return()'s implied
+-       * memory barrier is paired with smp_mb() in pwq_tryinc_nr_active() to
+-       * guarantee that either we see non-empty pending_pwqs or they see
+-       * decremented $nna->nr.
+-       *
+-       * $nna->max may change as CPUs come online/offline and @pwq->wq's
+-       * max_active gets updated. However, it is guaranteed to be equal to or
+-       * larger than @pwq->wq->min_active which is above zero unless freezing.
+-       * This maintains the forward progress guarantee.
+-       */
+-      if (atomic_dec_return(&nna->nr) >= READ_ONCE(nna->max))
+-              return;
+-
+-      if (!list_empty(&nna->pending_pwqs))
+-              node_activate_pending_pwq(nna, pool);
++      atomic_dec(&nna->nr);
++      pwq_activate_first_inactive(pwq);
+ }
+ 
+ /**
+@@ -2181,7 +1948,7 @@ retry:
+        * @work must also queue behind existing inactive work items to maintain
+        * ordering when max_active changes. See wq_adjust_max_active().
+        */
+-      if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) {
++      if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq)) {
+               if (list_empty(&pool->worklist))
+                       pool->watchdog_ts = jiffies;
+ 
+@@ -3414,7 +3181,7 @@ static void insert_wq_barrier(struct poo
+ 
+       barr->task = current;
+ 
+-      /* The barrier work item does not participate in nr_active. */
++      /* The barrier work item does not participate in pwq->nr_active. */
+       work_flags |= WORK_STRUCT_INACTIVE;
+ 
+       /*
+@@ -4330,8 +4097,6 @@ static void free_node_nr_active(struct w
+ static void init_node_nr_active(struct wq_node_nr_active *nna)
+ {
+       atomic_set(&nna->nr, 0);
+-      raw_spin_lock_init(&nna->lock);
+-      INIT_LIST_HEAD(&nna->pending_pwqs);
+ }
+ 
+ /*
+@@ -4571,15 +4336,6 @@ static void pwq_release_workfn(struct kt
+               mutex_unlock(&wq_pool_mutex);
+       }
+ 
+-      if (!list_empty(&pwq->pending_node)) {
+-              struct wq_node_nr_active *nna =
+-                      wq_node_nr_active(pwq->wq, pwq->pool->node);
+-
+-              raw_spin_lock_irq(&nna->lock);
+-              list_del_init(&pwq->pending_node);
+-              raw_spin_unlock_irq(&nna->lock);
+-      }
+-
+       call_rcu(&pwq->rcu, rcu_free_pwq);
+ 
+       /*
+@@ -4605,7 +4361,6 @@ static void init_pwq(struct pool_workque
+       pwq->flush_color = -1;
+       pwq->refcnt = 1;
+       INIT_LIST_HEAD(&pwq->inactive_works);
+-      INIT_LIST_HEAD(&pwq->pending_node);
+       INIT_LIST_HEAD(&pwq->pwqs_node);
+       INIT_LIST_HEAD(&pwq->mayday_node);
+       kthread_init_work(&pwq->release_work, pwq_release_workfn);
+@@ -4813,9 +4568,6 @@ static void apply_wqattrs_commit(struct
+                                                       ctx->pwq_tbl[cpu]);
+       ctx->dfl_pwq = install_unbound_pwq(ctx->wq, -1, ctx->dfl_pwq);
+ 
+-      /* update node_nr_active->max */
+-      wq_update_node_max_active(ctx->wq, -1);
+-
+       mutex_unlock(&ctx->wq->mutex);
+ }
+ 
+@@ -5089,35 +4841,24 @@ static int init_rescuer(struct workqueue
+ static void wq_adjust_max_active(struct workqueue_struct *wq)
+ {
+       bool activated;
+-      int new_max, new_min;
+ 
+       lockdep_assert_held(&wq->mutex);
+ 
+       if ((wq->flags & WQ_FREEZABLE) && workqueue_freezing) {
+-              new_max = 0;
+-              new_min = 0;
+-      } else {
+-              new_max = wq->saved_max_active;
+-              new_min = wq->saved_min_active;
++              WRITE_ONCE(wq->max_active, 0);
++              return;
+       }
+ 
+-      if (wq->max_active == new_max && wq->min_active == new_min)
++      if (wq->max_active == wq->saved_max_active)
+               return;
+ 
+       /*
+-       * Update @wq->max/min_active and then kick inactive work items if more
++       * Update @wq->max_active and then kick inactive work items if more
+        * active work items are allowed. This doesn't break work item ordering
+        * because new work items are always queued behind existing inactive
+        * work items if there are any.
+        */
+-      WRITE_ONCE(wq->max_active, new_max);
+-      WRITE_ONCE(wq->min_active, new_min);
+-
+-      if (wq->flags & WQ_UNBOUND)
+-              wq_update_node_max_active(wq, -1);
+-
+-      if (new_max == 0)
+-              return;
++      WRITE_ONCE(wq->max_active, wq->saved_max_active);
+ 
+       /*
+        * Round-robin through pwq's activating the first inactive work item
+@@ -5132,7 +4873,7 @@ static void wq_adjust_max_active(struct
+ 
+                       /* can be called during early boot w/ irq disabled */
+                       raw_spin_lock_irqsave(&pwq->pool->lock, flags);
+-                      if (pwq_activate_first_inactive(pwq, true)) {
++                      if (pwq_activate_first_inactive(pwq)) {
+                               activated = true;
+                               kick_pool(pwq->pool);
+                       }
+@@ -5194,9 +4935,7 @@ struct workqueue_struct *alloc_workqueue
+       /* init wq */
+       wq->flags = flags;
+       wq->max_active = max_active;
+-      wq->min_active = min(max_active, WQ_DFL_MIN_ACTIVE);
+-      wq->saved_max_active = wq->max_active;
+-      wq->saved_min_active = wq->min_active;
++      wq->saved_max_active = max_active;
+       mutex_init(&wq->mutex);
+       atomic_set(&wq->nr_pwqs_to_flush, 0);
+       INIT_LIST_HEAD(&wq->pwqs);
+@@ -5362,8 +5101,7 @@ EXPORT_SYMBOL_GPL(destroy_workqueue);
+  * @wq: target workqueue
+  * @max_active: new max_active value.
+  *
+- * Set max_active of @wq to @max_active. See the alloc_workqueue() function
+- * comment.
++ * Set max_active of @wq to @max_active.
+  *
+  * CONTEXT:
+  * Don't call from IRQ context.
+@@ -5380,9 +5118,6 @@ void workqueue_set_max_active(struct wor
+ 
+       wq->flags &= ~__WQ_ORDERED;
+       wq->saved_max_active = max_active;
+-      if (wq->flags & WQ_UNBOUND)
+-              wq->saved_min_active = min(wq->saved_min_active, max_active);
+-
+       wq_adjust_max_active(wq);
+ 
+       mutex_unlock(&wq->mutex);
+@@ -6064,10 +5799,6 @@ int workqueue_online_cpu(unsigned int cp
+ 
+                       for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]])
+                               wq_update_pod(wq, tcpu, cpu, true);
+-
+-                      mutex_lock(&wq->mutex);
+-                      wq_update_node_max_active(wq, -1);
+-                      mutex_unlock(&wq->mutex);
+               }
+       }
+ 
+@@ -6096,10 +5827,6 @@ int workqueue_offline_cpu(unsigned int c
+ 
+                       for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]])
+                               wq_update_pod(wq, tcpu, cpu, false);
+-
+-                      mutex_lock(&wq->mutex);
+-                      wq_update_node_max_active(wq, cpu);
+-                      mutex_unlock(&wq->mutex);
+               }
+       }
+       mutex_unlock(&wq_pool_mutex);
+@@ -7296,12 +7023,8 @@ void __init workqueue_init_topology(void
+        * combinations to apply per-pod sharing.
+        */
+       list_for_each_entry(wq, &workqueues, list) {
+-              for_each_online_cpu(cpu)
++              for_each_online_cpu(cpu) {
+                       wq_update_pod(wq, cpu, cpu, true);
+-              if (wq->flags & WQ_UNBOUND) {
+-                      mutex_lock(&wq->mutex);
+-                      wq_update_node_max_active(wq, -1);
+-                      mutex_unlock(&wq->mutex);
+               }
+       }
+ 
diff --git a/queue-6.6/revert-workqueue-introduce-struct-wq_node_nr_active.patch b/queue-6.6/revert-workqueue-introduce-struct-wq_node_nr_active.patch

new file mode 100644 (file)

index 0000000..8128e59
--- /dev/null
+++ b/queue-6.6/revert-workqueue-introduce-struct-wq_node_nr_active.patch
@@ -0,0 +1,278 @@
+From d5b52e9eca39289ad7194d20e3236a54166ee5a1 Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Wed, 3 Apr 2024 16:36:30 +0200
+Subject: Revert "workqueue: Introduce struct wq_node_nr_active"
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+This reverts commit b522229a56941adac1ea1da6593b2b5c734b5359 which is
+commit 91ccc6e7233bb10a9c176aa4cc70d6f432a441a5 upstream.
+
+The workqueue patches backported to 6.6.y caused some reported
+regressions, so revert them for now.
+
+Reported-by: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Sasha Levin <sashal@kernel.org>
+Cc: Audra Mitchell <audra@redhat.com>
+Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c |  142 ++---------------------------------------------------
+ 1 file changed, 7 insertions(+), 135 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -281,16 +281,6 @@ struct wq_flusher {
+ struct wq_device;
+ 
+ /*
+- * Unlike in a per-cpu workqueue where max_active limits its concurrency level
+- * on each CPU, in an unbound workqueue, max_active applies to the whole system.
+- * As sharing a single nr_active across multiple sockets can be very expensive,
+- * the counting and enforcement is per NUMA node.
+- */
+-struct wq_node_nr_active {
+-      atomic_t                nr;             /* per-node nr_active count */
+-};
+-
+-/*
+  * The externally visible workqueue.  It relays the issued work items to
+  * the appropriate worker_pool through its pool_workqueues.
+  */
+@@ -336,7 +326,6 @@ struct workqueue_struct {
+       /* hot fields used during command issue, aligned to cacheline */
+       unsigned int            flags ____cacheline_aligned; /* WQ: WQ_* flags */
+       struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */
+-      struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */
+ };
+ 
+ static struct kmem_cache *pwq_cache;
+@@ -1427,31 +1416,6 @@ work_func_t wq_worker_last_func(struct t
+ }
+ 
+ /**
+- * wq_node_nr_active - Determine wq_node_nr_active to use
+- * @wq: workqueue of interest
+- * @node: NUMA node, can be %NUMA_NO_NODE
+- *
+- * Determine wq_node_nr_active to use for @wq on @node. Returns:
+- *
+- * - %NULL for per-cpu workqueues as they don't need to use shared nr_active.
+- *
+- * - node_nr_active[nr_node_ids] if @node is %NUMA_NO_NODE.
+- *
+- * - Otherwise, node_nr_active[@node].
+- */
+-static struct wq_node_nr_active *wq_node_nr_active(struct workqueue_struct *wq,
+-                                                 int node)
+-{
+-      if (!(wq->flags & WQ_UNBOUND))
+-              return NULL;
+-
+-      if (node == NUMA_NO_NODE)
+-              node = nr_node_ids;
+-
+-      return wq->node_nr_active[node];
+-}
+-
+-/**
+  * get_pwq - get an extra reference on the specified pool_workqueue
+  * @pwq: pool_workqueue to get
+  *
+@@ -1532,17 +1496,12 @@ static bool pwq_activate_work(struct poo
+                             struct work_struct *work)
+ {
+       struct worker_pool *pool = pwq->pool;
+-      struct wq_node_nr_active *nna;
+ 
+       lockdep_assert_held(&pool->lock);
+ 
+       if (!(*work_data_bits(work) & WORK_STRUCT_INACTIVE))
+               return false;
+ 
+-      nna = wq_node_nr_active(pwq->wq, pool->node);
+-      if (nna)
+-              atomic_inc(&nna->nr);
+-
+       pwq->nr_active++;
+       __pwq_activate_work(pwq, work);
+       return true;
+@@ -1559,18 +1518,14 @@ static bool pwq_tryinc_nr_active(struct
+ {
+       struct workqueue_struct *wq = pwq->wq;
+       struct worker_pool *pool = pwq->pool;
+-      struct wq_node_nr_active *nna = wq_node_nr_active(wq, pool->node);
+       bool obtained;
+ 
+       lockdep_assert_held(&pool->lock);
+ 
+       obtained = pwq->nr_active < READ_ONCE(wq->max_active);
+ 
+-      if (obtained) {
++      if (obtained)
+               pwq->nr_active++;
+-              if (nna)
+-                      atomic_inc(&nna->nr);
+-      }
+       return obtained;
+ }
+ 
+@@ -1607,26 +1562,10 @@ static bool pwq_activate_first_inactive(
+ static void pwq_dec_nr_active(struct pool_workqueue *pwq)
+ {
+       struct worker_pool *pool = pwq->pool;
+-      struct wq_node_nr_active *nna = wq_node_nr_active(pwq->wq, pool->node);
+ 
+       lockdep_assert_held(&pool->lock);
+ 
+-      /*
+-       * @pwq->nr_active should be decremented for both percpu and unbound
+-       * workqueues.
+-       */
+       pwq->nr_active--;
+-
+-      /*
+-       * For a percpu workqueue, it's simple. Just need to kick the first
+-       * inactive work item on @pwq itself.
+-       */
+-      if (!nna) {
+-              pwq_activate_first_inactive(pwq);
+-              return;
+-      }
+-
+-      atomic_dec(&nna->nr);
+       pwq_activate_first_inactive(pwq);
+ }
+ 
+@@ -4081,63 +4020,11 @@ static void wq_free_lockdep(struct workq
+ }
+ #endif
+ 
+-static void free_node_nr_active(struct wq_node_nr_active **nna_ar)
+-{
+-      int node;
+-
+-      for_each_node(node) {
+-              kfree(nna_ar[node]);
+-              nna_ar[node] = NULL;
+-      }
+-
+-      kfree(nna_ar[nr_node_ids]);
+-      nna_ar[nr_node_ids] = NULL;
+-}
+-
+-static void init_node_nr_active(struct wq_node_nr_active *nna)
+-{
+-      atomic_set(&nna->nr, 0);
+-}
+-
+-/*
+- * Each node's nr_active counter will be accessed mostly from its own node and
+- * should be allocated in the node.
+- */
+-static int alloc_node_nr_active(struct wq_node_nr_active **nna_ar)
+-{
+-      struct wq_node_nr_active *nna;
+-      int node;
+-
+-      for_each_node(node) {
+-              nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, node);
+-              if (!nna)
+-                      goto err_free;
+-              init_node_nr_active(nna);
+-              nna_ar[node] = nna;
+-      }
+-
+-      /* [nr_node_ids] is used as the fallback */
+-      nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, NUMA_NO_NODE);
+-      if (!nna)
+-              goto err_free;
+-      init_node_nr_active(nna);
+-      nna_ar[nr_node_ids] = nna;
+-
+-      return 0;
+-
+-err_free:
+-      free_node_nr_active(nna_ar);
+-      return -ENOMEM;
+-}
+-
+ static void rcu_free_wq(struct rcu_head *rcu)
+ {
+       struct workqueue_struct *wq =
+               container_of(rcu, struct workqueue_struct, rcu);
+ 
+-      if (wq->flags & WQ_UNBOUND)
+-              free_node_nr_active(wq->node_nr_active);
+-
+       wq_free_lockdep(wq);
+       free_percpu(wq->cpu_pwq);
+       free_workqueue_attrs(wq->unbound_attrs);
+@@ -4889,8 +4776,7 @@ struct workqueue_struct *alloc_workqueue
+ {
+       va_list args;
+       struct workqueue_struct *wq;
+-      size_t wq_size;
+-      int name_len;
++      int len;
+ 
+       /*
+        * Unbound && max_active == 1 used to imply ordered, which is no longer
+@@ -4906,12 +4792,7 @@ struct workqueue_struct *alloc_workqueue
+               flags |= WQ_UNBOUND;
+ 
+       /* allocate wq and format name */
+-      if (flags & WQ_UNBOUND)
+-              wq_size = struct_size(wq, node_nr_active, nr_node_ids + 1);
+-      else
+-              wq_size = sizeof(*wq);
+-
+-      wq = kzalloc(wq_size, GFP_KERNEL);
++      wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+       if (!wq)
+               return NULL;
+ 
+@@ -4922,12 +4803,11 @@ struct workqueue_struct *alloc_workqueue
+       }
+ 
+       va_start(args, max_active);
+-      name_len = vsnprintf(wq->name, sizeof(wq->name), fmt, args);
++      len = vsnprintf(wq->name, sizeof(wq->name), fmt, args);
+       va_end(args);
+ 
+-      if (name_len >= WQ_NAME_LEN)
+-              pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n",
+-                           wq->name);
++      if (len >= WQ_NAME_LEN)
++              pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n", wq->name);
+ 
+       max_active = max_active ?: WQ_DFL_ACTIVE;
+       max_active = wq_clamp_max_active(max_active, flags, wq->name);
+@@ -4946,13 +4826,8 @@ struct workqueue_struct *alloc_workqueue
+       wq_init_lockdep(wq);
+       INIT_LIST_HEAD(&wq->list);
+ 
+-      if (flags & WQ_UNBOUND) {
+-              if (alloc_node_nr_active(wq->node_nr_active) < 0)
+-                      goto err_unreg_lockdep;
+-      }
+-
+       if (alloc_and_link_pwqs(wq) < 0)
+-              goto err_free_node_nr_active;
++              goto err_unreg_lockdep;
+ 
+       if (wq_online && init_rescuer(wq) < 0)
+               goto err_destroy;
+@@ -4977,9 +4852,6 @@ struct workqueue_struct *alloc_workqueue
+ 
+       return wq;
+ 
+-err_free_node_nr_active:
+-      if (wq->flags & WQ_UNBOUND)
+-              free_node_nr_active(wq->node_nr_active);
+ err_unreg_lockdep:
+       wq_unregister_lockdep(wq);
+       wq_free_lockdep(wq);
diff --git a/queue-6.6/revert-workqueue-make-wq_adjust_max_active-round-robin-pwqs-while-activating.patch b/queue-6.6/revert-workqueue-make-wq_adjust_max_active-round-robin-pwqs-while-activating.patch

new file mode 100644 (file)

index 0000000..7d41d34
--- /dev/null
+++ b/queue-6.6/revert-workqueue-make-wq_adjust_max_active-round-robin-pwqs-while-activating.patch
@@ -0,0 +1,75 @@
+From f8b60d9d4e00f518c2674d537b5ec5c9b0a9a336 Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Wed, 3 Apr 2024 16:36:44 +0200
+Subject: Revert "workqueue: Make wq_adjust_max_active() round-robin pwqs while activating"
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+This reverts commit 5f99fee6f2dea1228980c3e785ab1a2c69b4da3c which is
+commit qc5404d4e6df6faba1007544b5f4e62c7c14416dd upstream.
+
+The workqueue patches backported to 6.6.y caused some reported
+regressions, so revert them for now.
+
+Reported-by: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Sasha Levin <sashal@kernel.org>
+Cc: Audra Mitchell <audra@redhat.com>
+Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c |   33 +++++++++++++--------------------
+ 1 file changed, 13 insertions(+), 20 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -4710,7 +4710,7 @@ static int init_rescuer(struct workqueue
+  */
+ static void wq_adjust_max_active(struct workqueue_struct *wq)
+ {
+-      bool activated;
++      struct pool_workqueue *pwq;
+ 
+       lockdep_assert_held(&wq->mutex);
+ 
+@@ -4730,26 +4730,19 @@ static void wq_adjust_max_active(struct
+        */
+       WRITE_ONCE(wq->max_active, wq->saved_max_active);
+ 
+-      /*
+-       * Round-robin through pwq's activating the first inactive work item
+-       * until max_active is filled.
+-       */
+-      do {
+-              struct pool_workqueue *pwq;
++      for_each_pwq(pwq, wq) {
++              unsigned long flags;
++
++              /* this function can be called during early boot w/ irq disabled */
++              raw_spin_lock_irqsave(&pwq->pool->lock, flags);
+ 
+-              activated = false;
+-              for_each_pwq(pwq, wq) {
+-                      unsigned long flags;
+-
+-                      /* can be called during early boot w/ irq disabled */
+-                      raw_spin_lock_irqsave(&pwq->pool->lock, flags);
+-                      if (pwq_activate_first_inactive(pwq)) {
+-                              activated = true;
+-                              kick_pool(pwq->pool);
+-                      }
+-                      raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
+-              }
+-      } while (activated);
++              while (pwq_activate_first_inactive(pwq))
++                      ;
++
++              kick_pool(pwq->pool);
++
++              raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
++      }
+ }
+ 
+ __printf(1, 4)
diff --git a/queue-6.6/revert-workqueue-move-nr_active-handling-into-helpers.patch b/queue-6.6/revert-workqueue-move-nr_active-handling-into-helpers.patch

new file mode 100644 (file)

index 0000000..c6c34ef
--- /dev/null
+++ b/queue-6.6/revert-workqueue-move-nr_active-handling-into-helpers.patch
@@ -0,0 +1,158 @@
+From 940359967b7a88147cc8f97cc25b0980fd414a9b Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Wed, 3 Apr 2024 16:36:45 +0200
+Subject: Revert "workqueue: Move nr_active handling into helpers"
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+This reverts commit 4023a2d95076918abe2757d60810642a8115b586 which is
+commit 1c270b79ce0b8290f146255ea9057243f6dd3c17 upstream.
+
+The workqueue patches backported to 6.6.y caused some reported
+regressions, so revert them for now.
+
+Reported-by: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Sasha Levin <sashal@kernel.org>
+Cc: Audra Mitchell <audra@redhat.com>
+Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c |   86 +++++++++++------------------------------------------
+ 1 file changed, 19 insertions(+), 67 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -1458,14 +1458,11 @@ static bool pwq_is_empty(struct pool_wor
+ static void __pwq_activate_work(struct pool_workqueue *pwq,
+                               struct work_struct *work)
+ {
+-      unsigned long *wdb = work_data_bits(work);
+-
+-      WARN_ON_ONCE(!(*wdb & WORK_STRUCT_INACTIVE));
+       trace_workqueue_activate_work(work);
+       if (list_empty(&pwq->pool->worklist))
+               pwq->pool->watchdog_ts = jiffies;
+       move_linked_works(work, &pwq->pool->worklist, NULL);
+-      __clear_bit(WORK_STRUCT_INACTIVE_BIT, wdb);
++      __clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
+ }
+ 
+ /**
+@@ -1490,66 +1487,12 @@ static bool pwq_activate_work(struct poo
+       return true;
+ }
+ 
+-/**
+- * pwq_tryinc_nr_active - Try to increment nr_active for a pwq
+- * @pwq: pool_workqueue of interest
+- *
+- * Try to increment nr_active for @pwq. Returns %true if an nr_active count is
+- * successfully obtained. %false otherwise.
+- */
+-static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq)
+-{
+-      struct workqueue_struct *wq = pwq->wq;
+-      struct worker_pool *pool = pwq->pool;
+-      bool obtained;
+-
+-      lockdep_assert_held(&pool->lock);
+-
+-      obtained = pwq->nr_active < READ_ONCE(wq->max_active);
+-
+-      if (obtained)
+-              pwq->nr_active++;
+-      return obtained;
+-}
+-
+-/**
+- * pwq_activate_first_inactive - Activate the first inactive work item on a pwq
+- * @pwq: pool_workqueue of interest
+- *
+- * Activate the first inactive work item of @pwq if available and allowed by
+- * max_active limit.
+- *
+- * Returns %true if an inactive work item has been activated. %false if no
+- * inactive work item is found or max_active limit is reached.
+- */
+-static bool pwq_activate_first_inactive(struct pool_workqueue *pwq)
+-{
+-      struct work_struct *work =
+-              list_first_entry_or_null(&pwq->inactive_works,
+-                                       struct work_struct, entry);
+-
+-      if (work && pwq_tryinc_nr_active(pwq)) {
+-              __pwq_activate_work(pwq, work);
+-              return true;
+-      } else {
+-              return false;
+-      }
+-}
+-
+-/**
+- * pwq_dec_nr_active - Retire an active count
+- * @pwq: pool_workqueue of interest
+- *
+- * Decrement @pwq's nr_active and try to activate the first inactive work item.
+- */
+-static void pwq_dec_nr_active(struct pool_workqueue *pwq)
++static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
+ {
+-      struct worker_pool *pool = pwq->pool;
++      struct work_struct *work = list_first_entry(&pwq->inactive_works,
++                                                  struct work_struct, entry);
+ 
+-      lockdep_assert_held(&pool->lock);
+-
+-      pwq->nr_active--;
+-      pwq_activate_first_inactive(pwq);
++      pwq_activate_work(pwq, work);
+ }
+ 
+ /**
+@@ -1567,8 +1510,14 @@ static void pwq_dec_nr_in_flight(struct
+ {
+       int color = get_work_color(work_data);
+ 
+-      if (!(work_data & WORK_STRUCT_INACTIVE))
+-              pwq_dec_nr_active(pwq);
++      if (!(work_data & WORK_STRUCT_INACTIVE)) {
++              pwq->nr_active--;
++              if (!list_empty(&pwq->inactive_works)) {
++                      /* one down, submit an inactive one */
++                      if (pwq->nr_active < READ_ONCE(pwq->wq->max_active))
++                              pwq_activate_first_inactive(pwq);
++              }
++      }
+ 
+       pwq->nr_in_flight[color]--;
+ 
+@@ -1870,11 +1819,13 @@ retry:
+        * @work must also queue behind existing inactive work items to maintain
+        * ordering when max_active changes. See wq_adjust_max_active().
+        */
+-      if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq)) {
++      if (list_empty(&pwq->inactive_works) &&
++          pwq->nr_active < READ_ONCE(pwq->wq->max_active)) {
+               if (list_empty(&pool->worklist))
+                       pool->watchdog_ts = jiffies;
+ 
+               trace_workqueue_activate_work(work);
++              pwq->nr_active++;
+               insert_work(pwq, work, &pool->worklist, work_flags);
+               kick_pool(pool);
+       } else {
+@@ -4736,8 +4687,9 @@ static void wq_adjust_max_active(struct
+               /* this function can be called during early boot w/ irq disabled */
+               raw_spin_lock_irqsave(&pwq->pool->lock, flags);
+ 
+-              while (pwq_activate_first_inactive(pwq))
+-                      ;
++              while (!list_empty(&pwq->inactive_works) &&
++                     pwq->nr_active < wq->max_active)
++                      pwq_activate_first_inactive(pwq);
+ 
+               kick_pool(pwq->pool);
+ 
diff --git a/queue-6.6/revert-workqueue-move-pwq-max_active-to-wq-max_active.patch b/queue-6.6/revert-workqueue-move-pwq-max_active-to-wq-max_active.patch

new file mode 100644 (file)

index 0000000..9fc7956
--- /dev/null
+++ b/queue-6.6/revert-workqueue-move-pwq-max_active-to-wq-max_active.patch
@@ -0,0 +1,286 @@
+From 93debe501e2a602b08ef66e1f7161a3d6d98db0a Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Wed, 3 Apr 2024 16:36:50 +0200
+Subject: Revert "workqueue: Move pwq->max_active to wq->max_active"
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+This reverts commit 82e098f5bed1ff167332d26f8551662098747ec4 which is
+commit a045a272d887575da17ad86d6573e82871b50c27 upstream.
+
+The workqueue patches backported to 6.6.y caused some reported
+regressions, so revert them for now.
+
+Reported-by: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Sasha Levin <sashal@kernel.org>
+Cc: Audra Mitchell <audra@redhat.com>
+Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c |  133 ++++++++++++++++++++++++++---------------------------
+ 1 file changed, 67 insertions(+), 66 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -143,9 +143,6 @@ enum {
+  *
+  * WR: wq->mutex protected for writes.  RCU protected for reads.
+  *
+- * WO: wq->mutex protected for writes. Updated with WRITE_ONCE() and can be read
+- *     with READ_ONCE() without locking.
+- *
+  * MD: wq_mayday_lock protected.
+  *
+  * WD: Used internally by the watchdog.
+@@ -253,6 +250,7 @@ struct pool_workqueue {
+        * is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
+        */
+       int                     nr_active;      /* L: nr of active works */
++      int                     max_active;     /* L: max active works */
+       struct list_head        inactive_works; /* L: inactive works */
+       struct list_head        pwqs_node;      /* WR: node on wq->pwqs */
+       struct list_head        mayday_node;    /* MD: node on wq->maydays */
+@@ -300,8 +298,7 @@ struct workqueue_struct {
+       struct worker           *rescuer;       /* MD: rescue worker */
+ 
+       int                     nr_drainers;    /* WQ: drain in progress */
+-      int                     max_active;     /* WO: max active works */
+-      int                     saved_max_active; /* WQ: saved max_active */
++      int                     saved_max_active; /* WQ: saved pwq max_active */
+ 
+       struct workqueue_attrs  *unbound_attrs; /* PW: only for unbound wqs */
+       struct pool_workqueue   *dfl_pwq;       /* PW: only for unbound wqs */
+@@ -1489,7 +1486,7 @@ static void pwq_dec_nr_in_flight(struct
+               pwq->nr_active--;
+               if (!list_empty(&pwq->inactive_works)) {
+                       /* one down, submit an inactive one */
+-                      if (pwq->nr_active < READ_ONCE(pwq->wq->max_active))
++                      if (pwq->nr_active < pwq->max_active)
+                               pwq_activate_first_inactive(pwq);
+               }
+       }
+@@ -1790,13 +1787,7 @@ retry:
+       pwq->nr_in_flight[pwq->work_color]++;
+       work_flags = work_color_to_flags(pwq->work_color);
+ 
+-      /*
+-       * Limit the number of concurrently active work items to max_active.
+-       * @work must also queue behind existing inactive work items to maintain
+-       * ordering when max_active changes. See wq_adjust_max_active().
+-       */
+-      if (list_empty(&pwq->inactive_works) &&
+-          pwq->nr_active < READ_ONCE(pwq->wq->max_active)) {
++      if (likely(pwq->nr_active < pwq->max_active)) {
+               if (list_empty(&pool->worklist))
+                       pool->watchdog_ts = jiffies;
+ 
+@@ -4145,6 +4136,50 @@ static void pwq_release_workfn(struct kt
+       }
+ }
+ 
++/**
++ * pwq_adjust_max_active - update a pwq's max_active to the current setting
++ * @pwq: target pool_workqueue
++ *
++ * If @pwq isn't freezing, set @pwq->max_active to the associated
++ * workqueue's saved_max_active and activate inactive work items
++ * accordingly.  If @pwq is freezing, clear @pwq->max_active to zero.
++ */
++static void pwq_adjust_max_active(struct pool_workqueue *pwq)
++{
++      struct workqueue_struct *wq = pwq->wq;
++      bool freezable = wq->flags & WQ_FREEZABLE;
++      unsigned long flags;
++
++      /* for @wq->saved_max_active */
++      lockdep_assert_held(&wq->mutex);
++
++      /* fast exit for non-freezable wqs */
++      if (!freezable && pwq->max_active == wq->saved_max_active)
++              return;
++
++      /* this function can be called during early boot w/ irq disabled */
++      raw_spin_lock_irqsave(&pwq->pool->lock, flags);
++
++      /*
++       * During [un]freezing, the caller is responsible for ensuring that
++       * this function is called at least once after @workqueue_freezing
++       * is updated and visible.
++       */
++      if (!freezable || !workqueue_freezing) {
++              pwq->max_active = wq->saved_max_active;
++
++              while (!list_empty(&pwq->inactive_works) &&
++                     pwq->nr_active < pwq->max_active)
++                      pwq_activate_first_inactive(pwq);
++
++              kick_pool(pwq->pool);
++      } else {
++              pwq->max_active = 0;
++      }
++
++      raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
++}
++
+ /* initialize newly allocated @pwq which is associated with @wq and @pool */
+ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
+                    struct worker_pool *pool)
+@@ -4177,6 +4212,9 @@ static void link_pwq(struct pool_workque
+       /* set the matching work_color */
+       pwq->work_color = wq->work_color;
+ 
++      /* sync max_active to the current setting */
++      pwq_adjust_max_active(pwq);
++
+       /* link in @pwq */
+       list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
+ }
+@@ -4627,52 +4665,6 @@ static int init_rescuer(struct workqueue
+       return 0;
+ }
+ 
+-/**
+- * wq_adjust_max_active - update a wq's max_active to the current setting
+- * @wq: target workqueue
+- *
+- * If @wq isn't freezing, set @wq->max_active to the saved_max_active and
+- * activate inactive work items accordingly. If @wq is freezing, clear
+- * @wq->max_active to zero.
+- */
+-static void wq_adjust_max_active(struct workqueue_struct *wq)
+-{
+-      struct pool_workqueue *pwq;
+-
+-      lockdep_assert_held(&wq->mutex);
+-
+-      if ((wq->flags & WQ_FREEZABLE) && workqueue_freezing) {
+-              WRITE_ONCE(wq->max_active, 0);
+-              return;
+-      }
+-
+-      if (wq->max_active == wq->saved_max_active)
+-              return;
+-
+-      /*
+-       * Update @wq->max_active and then kick inactive work items if more
+-       * active work items are allowed. This doesn't break work item ordering
+-       * because new work items are always queued behind existing inactive
+-       * work items if there are any.
+-       */
+-      WRITE_ONCE(wq->max_active, wq->saved_max_active);
+-
+-      for_each_pwq(pwq, wq) {
+-              unsigned long flags;
+-
+-              /* this function can be called during early boot w/ irq disabled */
+-              raw_spin_lock_irqsave(&pwq->pool->lock, flags);
+-
+-              while (!list_empty(&pwq->inactive_works) &&
+-                     pwq->nr_active < wq->max_active)
+-                      pwq_activate_first_inactive(pwq);
+-
+-              kick_pool(pwq->pool);
+-
+-              raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
+-      }
+-}
+-
+ __printf(1, 4)
+ struct workqueue_struct *alloc_workqueue(const char *fmt,
+                                        unsigned int flags,
+@@ -4680,6 +4672,7 @@ struct workqueue_struct *alloc_workqueue
+ {
+       va_list args;
+       struct workqueue_struct *wq;
++      struct pool_workqueue *pwq;
+       int len;
+ 
+       /*
+@@ -4718,7 +4711,6 @@ struct workqueue_struct *alloc_workqueue
+ 
+       /* init wq */
+       wq->flags = flags;
+-      wq->max_active = max_active;
+       wq->saved_max_active = max_active;
+       mutex_init(&wq->mutex);
+       atomic_set(&wq->nr_pwqs_to_flush, 0);
+@@ -4747,7 +4739,8 @@ struct workqueue_struct *alloc_workqueue
+       mutex_lock(&wq_pool_mutex);
+ 
+       mutex_lock(&wq->mutex);
+-      wq_adjust_max_active(wq);
++      for_each_pwq(pwq, wq)
++              pwq_adjust_max_active(pwq);
+       mutex_unlock(&wq->mutex);
+ 
+       list_add_tail_rcu(&wq->list, &workqueues);
+@@ -4885,6 +4878,8 @@ EXPORT_SYMBOL_GPL(destroy_workqueue);
+  */
+ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
+ {
++      struct pool_workqueue *pwq;
++
+       /* disallow meddling with max_active for ordered workqueues */
+       if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
+               return;
+@@ -4895,7 +4890,9 @@ void workqueue_set_max_active(struct wor
+ 
+       wq->flags &= ~__WQ_ORDERED;
+       wq->saved_max_active = max_active;
+-      wq_adjust_max_active(wq);
++
++      for_each_pwq(pwq, wq)
++              pwq_adjust_max_active(pwq);
+ 
+       mutex_unlock(&wq->mutex);
+ }
+@@ -5142,8 +5139,8 @@ static void show_pwq(struct pool_workque
+       pr_info("  pwq %d:", pool->id);
+       pr_cont_pool_info(pool);
+ 
+-      pr_cont(" active=%d refcnt=%d%s\n",
+-              pwq->nr_active, pwq->refcnt,
++      pr_cont(" active=%d/%d refcnt=%d%s\n",
++              pwq->nr_active, pwq->max_active, pwq->refcnt,
+               !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
+ 
+       hash_for_each(pool->busy_hash, bkt, worker, hentry) {
+@@ -5691,6 +5688,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu_safe_key);
+ void freeze_workqueues_begin(void)
+ {
+       struct workqueue_struct *wq;
++      struct pool_workqueue *pwq;
+ 
+       mutex_lock(&wq_pool_mutex);
+ 
+@@ -5699,7 +5697,8 @@ void freeze_workqueues_begin(void)
+ 
+       list_for_each_entry(wq, &workqueues, list) {
+               mutex_lock(&wq->mutex);
+-              wq_adjust_max_active(wq);
++              for_each_pwq(pwq, wq)
++                      pwq_adjust_max_active(pwq);
+               mutex_unlock(&wq->mutex);
+       }
+ 
+@@ -5764,6 +5763,7 @@ out_unlock:
+ void thaw_workqueues(void)
+ {
+       struct workqueue_struct *wq;
++      struct pool_workqueue *pwq;
+ 
+       mutex_lock(&wq_pool_mutex);
+ 
+@@ -5775,7 +5775,8 @@ void thaw_workqueues(void)
+       /* restore max_active and repopulate worklist */
+       list_for_each_entry(wq, &workqueues, list) {
+               mutex_lock(&wq->mutex);
+-              wq_adjust_max_active(wq);
++              for_each_pwq(pwq, wq)
++                      pwq_adjust_max_active(pwq);
+               mutex_unlock(&wq->mutex);
+       }
+ 
diff --git a/queue-6.6/revert-workqueue-rcu-protect-wq-dfl_pwq-and-implement-accessors-for-it.patch b/queue-6.6/revert-workqueue-rcu-protect-wq-dfl_pwq-and-implement-accessors-for-it.patch

new file mode 100644 (file)

index 0000000..9f555a0
--- /dev/null
+++ b/queue-6.6/revert-workqueue-rcu-protect-wq-dfl_pwq-and-implement-accessors-for-it.patch
@@ -0,0 +1,171 @@
+From 23771ebd17c800a345cd147ffaca383e52d0daa8 Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Wed, 3 Apr 2024 16:36:42 +0200
+Subject: Revert "workqueue: RCU protect wq->dfl_pwq and implement accessors for it"
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+This reverts commit bd31fb926dfa02d2ccfb4b79389168b1d16f36b1 which is
+commit 9f66cff212bb3c1cd25996aaa0dfd0c9e9d8baab upstream.
+
+The workqueue patches backported to 6.6.y caused some reported
+regressions, so revert them for now.
+
+Reported-by: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Sasha Levin <sashal@kernel.org>
+Cc: Audra Mitchell <audra@redhat.com>
+Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c |   64 +++++++++++++++++++----------------------------------
+ 1 file changed, 24 insertions(+), 40 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -304,7 +304,7 @@ struct workqueue_struct {
+       int                     saved_max_active; /* WQ: saved max_active */
+ 
+       struct workqueue_attrs  *unbound_attrs; /* PW: only for unbound wqs */
+-      struct pool_workqueue __rcu *dfl_pwq;   /* PW: only for unbound wqs */
++      struct pool_workqueue   *dfl_pwq;       /* PW: only for unbound wqs */
+ 
+ #ifdef CONFIG_SYSFS
+       struct wq_device        *wq_dev;        /* I: for sysfs interface */
+@@ -629,23 +629,6 @@ static int worker_pool_assign_id(struct
+       return ret;
+ }
+ 
+-static struct pool_workqueue __rcu **
+-unbound_pwq_slot(struct workqueue_struct *wq, int cpu)
+-{
+-       if (cpu >= 0)
+-               return per_cpu_ptr(wq->cpu_pwq, cpu);
+-       else
+-               return &wq->dfl_pwq;
+-}
+-
+-/* @cpu < 0 for dfl_pwq */
+-static struct pool_workqueue *unbound_pwq(struct workqueue_struct *wq, int cpu)
+-{
+-      return rcu_dereference_check(*unbound_pwq_slot(wq, cpu),
+-                                   lockdep_is_held(&wq_pool_mutex) ||
+-                                   lockdep_is_held(&wq->mutex));
+-}
+-
+ static unsigned int work_color_to_flags(int color)
+ {
+       return color << WORK_STRUCT_COLOR_SHIFT;
+@@ -4335,11 +4318,10 @@ static void wq_calc_pod_cpumask(struct w
+                               "possible intersect\n");
+ }
+ 
+-/* install @pwq into @wq and return the old pwq, @cpu < 0 for dfl_pwq */
++/* install @pwq into @wq's cpu_pwq and return the old pwq */
+ static struct pool_workqueue *install_unbound_pwq(struct workqueue_struct *wq,
+                                       int cpu, struct pool_workqueue *pwq)
+ {
+-      struct pool_workqueue __rcu **slot = unbound_pwq_slot(wq, cpu);
+       struct pool_workqueue *old_pwq;
+ 
+       lockdep_assert_held(&wq_pool_mutex);
+@@ -4348,8 +4330,8 @@ static struct pool_workqueue *install_un
+       /* link_pwq() can handle duplicate calls */
+       link_pwq(pwq);
+ 
+-      old_pwq = rcu_access_pointer(*slot);
+-      rcu_assign_pointer(*slot, pwq);
++      old_pwq = rcu_access_pointer(*per_cpu_ptr(wq->cpu_pwq, cpu));
++      rcu_assign_pointer(*per_cpu_ptr(wq->cpu_pwq, cpu), pwq);
+       return old_pwq;
+ }
+ 
+@@ -4449,11 +4431,14 @@ static void apply_wqattrs_commit(struct
+ 
+       copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
+ 
+-      /* save the previous pwqs and install the new ones */
++      /* save the previous pwq and install the new one */
+       for_each_possible_cpu(cpu)
+               ctx->pwq_tbl[cpu] = install_unbound_pwq(ctx->wq, cpu,
+                                                       ctx->pwq_tbl[cpu]);
+-      ctx->dfl_pwq = install_unbound_pwq(ctx->wq, -1, ctx->dfl_pwq);
++
++      /* @dfl_pwq might not have been used, ensure it's linked */
++      link_pwq(ctx->dfl_pwq);
++      swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
+ 
+       mutex_unlock(&ctx->wq->mutex);
+ }
+@@ -4576,7 +4561,9 @@ static void wq_update_pod(struct workque
+ 
+       /* nothing to do if the target cpumask matches the current pwq */
+       wq_calc_pod_cpumask(target_attrs, cpu, off_cpu);
+-      if (wqattrs_equal(target_attrs, unbound_pwq(wq, cpu)->pool->attrs))
++      pwq = rcu_dereference_protected(*per_cpu_ptr(wq->cpu_pwq, cpu),
++                                      lockdep_is_held(&wq_pool_mutex));
++      if (wqattrs_equal(target_attrs, pwq->pool->attrs))
+               return;
+ 
+       /* create a new pwq */
+@@ -4594,11 +4581,10 @@ static void wq_update_pod(struct workque
+ 
+ use_dfl_pwq:
+       mutex_lock(&wq->mutex);
+-      pwq = unbound_pwq(wq, -1);
+-      raw_spin_lock_irq(&pwq->pool->lock);
+-      get_pwq(pwq);
+-      raw_spin_unlock_irq(&pwq->pool->lock);
+-      old_pwq = install_unbound_pwq(wq, cpu, pwq);
++      raw_spin_lock_irq(&wq->dfl_pwq->pool->lock);
++      get_pwq(wq->dfl_pwq);
++      raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock);
++      old_pwq = install_unbound_pwq(wq, cpu, wq->dfl_pwq);
+ out_unlock:
+       mutex_unlock(&wq->mutex);
+       put_pwq_unlocked(old_pwq);
+@@ -4636,13 +4622,10 @@ static int alloc_and_link_pwqs(struct wo
+ 
+       cpus_read_lock();
+       if (wq->flags & __WQ_ORDERED) {
+-              struct pool_workqueue *dfl_pwq;
+-
+               ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
+               /* there should only be single pwq for ordering guarantee */
+-              dfl_pwq = rcu_access_pointer(wq->dfl_pwq);
+-              WARN(!ret && (wq->pwqs.next != &dfl_pwq->pwqs_node ||
+-                            wq->pwqs.prev != &dfl_pwq->pwqs_node),
++              WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
++                            wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
+                    "ordering guarantee broken for workqueue %s\n", wq->name);
+       } else {
+               ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
+@@ -4873,7 +4856,7 @@ static bool pwq_busy(struct pool_workque
+               if (pwq->nr_in_flight[i])
+                       return true;
+ 
+-      if ((pwq != rcu_access_pointer(pwq->wq->dfl_pwq)) && (pwq->refcnt > 1))
++      if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
+               return true;
+       if (!pwq_is_empty(pwq))
+               return true;
+@@ -4957,12 +4940,13 @@ void destroy_workqueue(struct workqueue_
+       rcu_read_lock();
+ 
+       for_each_possible_cpu(cpu) {
+-              put_pwq_unlocked(unbound_pwq(wq, cpu));
+-              RCU_INIT_POINTER(*unbound_pwq_slot(wq, cpu), NULL);
++              pwq = rcu_access_pointer(*per_cpu_ptr(wq->cpu_pwq, cpu));
++              RCU_INIT_POINTER(*per_cpu_ptr(wq->cpu_pwq, cpu), NULL);
++              put_pwq_unlocked(pwq);
+       }
+ 
+-      put_pwq_unlocked(unbound_pwq(wq, -1));
+-      RCU_INIT_POINTER(*unbound_pwq_slot(wq, -1), NULL);
++      put_pwq_unlocked(wq->dfl_pwq);
++      wq->dfl_pwq = NULL;
+ 
+       rcu_read_unlock();
+ }
diff --git a/queue-6.6/revert-workqueue-replace-pwq_activate_inactive_work-with-pwq_activate_work.patch b/queue-6.6/revert-workqueue-replace-pwq_activate_inactive_work-with-pwq_activate_work.patch

new file mode 100644 (file)

index 0000000..d1c8840
--- /dev/null
+++ b/queue-6.6/revert-workqueue-replace-pwq_activate_inactive_work-with-pwq_activate_work.patch
@@ -0,0 +1,86 @@
+From 41dfebeb2e334d3b6a7da6325b46292f585f1cf1 Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Wed, 3 Apr 2024 16:36:47 +0200
+Subject: Revert "workqueue: Replace pwq_activate_inactive_work() with [__]pwq_activate_work()"
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+This reverts commit 6c592f0bb96815117538491e5ba12e0a8a8c4493 which is
+commit 4c6380305d21e36581b451f7337a36c93b64e050 upstream.
+
+The workqueue patches backported to 6.6.y caused some reported
+regressions, so revert them for now.
+
+Reported-by: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Sasha Levin <sashal@kernel.org>
+Cc: Audra Mitchell <audra@redhat.com>
+Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c |   31 ++++++-------------------------
+ 1 file changed, 6 insertions(+), 25 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -1455,36 +1455,16 @@ static bool pwq_is_empty(struct pool_wor
+       return !pwq->nr_active && list_empty(&pwq->inactive_works);
+ }
+ 
+-static void __pwq_activate_work(struct pool_workqueue *pwq,
+-                              struct work_struct *work)
++static void pwq_activate_inactive_work(struct work_struct *work)
+ {
++      struct pool_workqueue *pwq = get_work_pwq(work);
++
+       trace_workqueue_activate_work(work);
+       if (list_empty(&pwq->pool->worklist))
+               pwq->pool->watchdog_ts = jiffies;
+       move_linked_works(work, &pwq->pool->worklist, NULL);
+       __clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
+-}
+-
+-/**
+- * pwq_activate_work - Activate a work item if inactive
+- * @pwq: pool_workqueue @work belongs to
+- * @work: work item to activate
+- *
+- * Returns %true if activated. %false if already active.
+- */
+-static bool pwq_activate_work(struct pool_workqueue *pwq,
+-                            struct work_struct *work)
+-{
+-      struct worker_pool *pool = pwq->pool;
+-
+-      lockdep_assert_held(&pool->lock);
+-
+-      if (!(*work_data_bits(work) & WORK_STRUCT_INACTIVE))
+-              return false;
+-
+       pwq->nr_active++;
+-      __pwq_activate_work(pwq, work);
+-      return true;
+ }
+ 
+ static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
+@@ -1492,7 +1472,7 @@ static void pwq_activate_first_inactive(
+       struct work_struct *work = list_first_entry(&pwq->inactive_works,
+                                                   struct work_struct, entry);
+ 
+-      pwq_activate_work(pwq, work);
++      pwq_activate_inactive_work(work);
+ }
+ 
+ /**
+@@ -1630,7 +1610,8 @@ static int try_to_grab_pending(struct wo
+                * management later on and cause stall.  Make sure the work
+                * item is activated before grabbing.
+                */
+-              pwq_activate_work(pwq, work);
++              if (*work_data_bits(work) & WORK_STRUCT_INACTIVE)
++                      pwq_activate_inactive_work(work);
+ 
+               list_del_init(&work->entry);
+               pwq_dec_nr_in_flight(pwq, *work_data_bits(work));
diff --git a/queue-6.6/revert-workqueue-shorten-events_freezable_power_efficient-name.patch b/queue-6.6/revert-workqueue-shorten-events_freezable_power_efficient-name.patch

new file mode 100644 (file)

index 0000000..d78b659
--- /dev/null
+++ b/queue-6.6/revert-workqueue-shorten-events_freezable_power_efficient-name.patch
@@ -0,0 +1,36 @@
+From 8961cef2c8974269d8a27bf4ac0485c1a9b1cb35 Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Wed, 3 Apr 2024 16:35:17 +0200
+Subject: Revert "workqueue: Shorten events_freezable_power_efficient name"
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+This reverts commit 8b934390272d50ae0e7e320617437a03e5712baa which is
+commit 8318d6a6362f5903edb4c904a8dd447e59be4ad1 upstream.
+
+The workqueue patches backported to 6.6.y caused some reported
+regressions, so revert them for now.
+
+Reported-by: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Sasha Levin <sashal@kernel.org>
+Cc: Audra Mitchell <audra@redhat.com>
+Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -7109,7 +7109,7 @@ void __init workqueue_init_early(void)
+                                             WQ_FREEZABLE, 0);
+       system_power_efficient_wq = alloc_workqueue("events_power_efficient",
+                                             WQ_POWER_EFFICIENT, 0);
+-      system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_pwr_efficient",
++      system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
+                                             WQ_FREEZABLE | WQ_POWER_EFFICIENT,
+                                             0);
+       BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
diff --git a/queue-6.6/revert-workqueue.c-increase-workqueue-name-length.patch b/queue-6.6/revert-workqueue.c-increase-workqueue-name-length.patch

new file mode 100644 (file)

index 0000000..dd2744d
--- /dev/null
+++ b/queue-6.6/revert-workqueue.c-increase-workqueue-name-length.patch
@@ -0,0 +1,58 @@
+From 7686571826f3e5b96d309b28aaaffa26a7325b48 Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Wed, 3 Apr 2024 16:36:51 +0200
+Subject: Revert "workqueue.c: Increase workqueue name length"
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+This reverts commit 43a181f8f41aca27e7454cf44a6dfbccc8b14e92 which is
+commit 31c89007285d365aa36f71d8fb0701581c770a27 upstream.
+
+The workqueue patches backported to 6.6.y caused some reported
+regressions, so revert them for now.
+
+Reported-by: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Sasha Levin <sashal@kernel.org>
+Cc: Audra Mitchell <audra@redhat.com>
+Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@leemhuis.info/
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/workqueue.c |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -108,7 +108,7 @@ enum {
+       RESCUER_NICE_LEVEL      = MIN_NICE,
+       HIGHPRI_NICE_LEVEL      = MIN_NICE,
+ 
+-      WQ_NAME_LEN             = 32,
++      WQ_NAME_LEN             = 24,
+ };
+ 
+ /*
+@@ -4673,7 +4673,6 @@ struct workqueue_struct *alloc_workqueue
+       va_list args;
+       struct workqueue_struct *wq;
+       struct pool_workqueue *pwq;
+-      int len;
+ 
+       /*
+        * Unbound && max_active == 1 used to imply ordered, which is no longer
+@@ -4700,12 +4699,9 @@ struct workqueue_struct *alloc_workqueue
+       }
+ 
+       va_start(args, max_active);
+-      len = vsnprintf(wq->name, sizeof(wq->name), fmt, args);
++      vsnprintf(wq->name, sizeof(wq->name), fmt, args);
+       va_end(args);
+ 
+-      if (len >= WQ_NAME_LEN)
+-              pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n", wq->name);
+-
+       max_active = max_active ?: WQ_DFL_ACTIVE;
+       max_active = wq_clamp_max_active(max_active, flags, wq->name);
+ 
diff --git a/queue-6.6/series b/queue-6.6/series

new file mode 100644 (file)

index 0000000..739caff
--- /dev/null
+++ b/queue-6.6/series
@@ -0,0 +1,11 @@
+revert-workqueue-shorten-events_freezable_power_efficient-name.patch
+revert-workqueue-don-t-call-cpumask_test_cpu-with-1-cpu-in-wq_update_node_max_active.patch
+revert-workqueue-implement-system-wide-nr_active-enforcement-for-unbound-workqueues.patch
+revert-workqueue-introduce-struct-wq_node_nr_active.patch
+revert-workqueue-rcu-protect-wq-dfl_pwq-and-implement-accessors-for-it.patch
+revert-workqueue-make-wq_adjust_max_active-round-robin-pwqs-while-activating.patch
+revert-workqueue-move-nr_active-handling-into-helpers.patch
+revert-workqueue-replace-pwq_activate_inactive_work-with-pwq_activate_work.patch
+revert-workqueue-factor-out-pwq_is_empty.patch
+revert-workqueue-move-pwq-max_active-to-wq-max_active.patch
+revert-workqueue.c-increase-workqueue-name-length.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 3 Apr 2024 17:47:30 +0000 (19:47 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 3 Apr 2024 17:47:30 +0000 (19:47 +0200)
queue-6.6/revert-workqueue-don-t-call-cpumask_test_cpu-with-1-cpu-in-wq_update_node_max_active.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/revert-workqueue-factor-out-pwq_is_empty.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/revert-workqueue-implement-system-wide-nr_active-enforcement-for-unbound-workqueues.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/revert-workqueue-introduce-struct-wq_node_nr_active.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/revert-workqueue-make-wq_adjust_max_active-round-robin-pwqs-while-activating.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/revert-workqueue-move-nr_active-handling-into-helpers.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/revert-workqueue-move-pwq-max_active-to-wq-max_active.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/revert-workqueue-rcu-protect-wq-dfl_pwq-and-implement-accessors-for-it.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/revert-workqueue-replace-pwq_activate_inactive_work-with-pwq_activate_work.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/revert-workqueue-shorten-events_freezable_power_efficient-name.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/revert-workqueue.c-increase-workqueue-name-length.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/series	[new file with mode: 0644]	patch \| blob