From: Sasha Levin <sashal@kernel.org>
Date: Fri, 22 May 2020 00:42:54 +0000 (-0400)
Subject: Fixes for 4.19
X-Git-Tag: v4.4.225~64
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=205e4eb4f0ee38d472c9eae659f8bfbb939e0211;p=thirdparty%2Fkernel%2Fstable-queue.git

Fixes for 4.19

Signed-off-by: Sasha Levin <sashal@kernel.org>
---

diff --git a/queue-4.19/padata-initialize-pd-cpu-with-effective-cpumask.patch b/queue-4.19/padata-initialize-pd-cpu-with-effective-cpumask.patch
new file mode 100644
index 00000000000..e787b5eb33f
--- /dev/null
+++ b/queue-4.19/padata-initialize-pd-cpu-with-effective-cpumask.patch
@@ -0,0 +1,76 @@
+From cf618cb5d9bb7da5a740eb6793a1468e101253f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 May 2020 16:40:50 -0400
+Subject: padata: initialize pd->cpu with effective cpumask
+
+From: Daniel Jordan <daniel.m.jordan@oracle.com>
+
+[ Upstream commit ec9c7d19336ee98ecba8de80128aa405c45feebb ]
+
+Exercising CPU hotplug on a 5.2 kernel with recent padata fixes from
+cryptodev-2.6.git in an 8-CPU kvm guest...
+
+    # modprobe tcrypt alg="pcrypt(rfc4106(gcm(aes)))" type=3
+    # echo 0 > /sys/devices/system/cpu/cpu1/online
+    # echo c > /sys/kernel/pcrypt/pencrypt/parallel_cpumask
+    # modprobe tcrypt mode=215
+
+...caused the following crash:
+
+    BUG: kernel NULL pointer dereference, address: 0000000000000000
+    #PF: supervisor read access in kernel mode
+    #PF: error_code(0x0000) - not-present page
+    PGD 0 P4D 0
+    Oops: 0000 [#1] SMP PTI
+    CPU: 2 PID: 134 Comm: kworker/2:2 Not tainted 5.2.0-padata-base+ #7
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-<snip>
+    Workqueue: pencrypt padata_parallel_worker
+    RIP: 0010:padata_reorder+0xcb/0x180
+    ...
+    Call Trace:
+     padata_do_serial+0x57/0x60
+     pcrypt_aead_enc+0x3a/0x50 [pcrypt]
+     padata_parallel_worker+0x9b/0xe0
+     process_one_work+0x1b5/0x3f0
+     worker_thread+0x4a/0x3c0
+     ...
+
+In padata_alloc_pd, pd->cpu is set using the user-supplied cpumask
+instead of the effective cpumask, and in this case cpumask_first picked
+an offline CPU.
+
+The offline CPU's reorder->list.next is NULL in padata_reorder because
+the list wasn't initialized in padata_init_pqueues, which only operates
+on CPUs in the effective mask.
+
+Fix by using the effective mask in padata_alloc_pd.
+
+Fixes: 6fc4dbcf0276 ("padata: Replace delayed timer with immediate workqueue in padata_reorder")
+Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Steffen Klassert <steffen.klassert@secunet.com>
+Cc: linux-crypto@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/padata.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/padata.c b/kernel/padata.c
+index 47dc31ce15ac..e9b8d517fd4b 100644
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -451,7 +451,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
+ 	atomic_set(&pd->refcnt, 1);
+ 	pd->pinst = pinst;
+ 	spin_lock_init(&pd->lock);
+-	pd->cpu = cpumask_first(pcpumask);
++	pd->cpu = cpumask_first(pd->cpumask.pcpu);
+ 	INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
+ 
+ 	return pd;
+-- 
+2.25.1
+
diff --git a/queue-4.19/padata-purge-get_cpu-and-reorder_via_wq-from-padata_.patch b/queue-4.19/padata-purge-get_cpu-and-reorder_via_wq-from-padata_.patch
new file mode 100644
index 00000000000..52e5b5d0919
--- /dev/null
+++ b/queue-4.19/padata-purge-get_cpu-and-reorder_via_wq-from-padata_.patch
@@ -0,0 +1,68 @@
+From 6e53b5dea7786576767c4aaf906eeff6cfff536d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 May 2020 16:40:51 -0400
+Subject: padata: purge get_cpu and reorder_via_wq from padata_do_serial
+
+From: Daniel Jordan <daniel.m.jordan@oracle.com>
+
+[ Upstream commit 065cf577135a4977931c7a1e1edf442bfd9773dd ]
+
+With the removal of the padata timer, padata_do_serial no longer
+needs special CPU handling, so remove it.
+
+Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Steffen Klassert <steffen.klassert@secunet.com>
+Cc: linux-crypto@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/padata.c | 23 +++--------------------
+ 1 file changed, 3 insertions(+), 20 deletions(-)
+
+diff --git a/kernel/padata.c b/kernel/padata.c
+index e9b8d517fd4b..93e4fb2d9f2e 100644
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -324,24 +324,9 @@ static void padata_serial_worker(struct work_struct *serial_work)
+  */
+ void padata_do_serial(struct padata_priv *padata)
+ {
+-	int cpu;
+-	struct padata_parallel_queue *pqueue;
+-	struct parallel_data *pd;
+-	int reorder_via_wq = 0;
+-
+-	pd = padata->pd;
+-
+-	cpu = get_cpu();
+-
+-	/* We need to enqueue the padata object into the correct
+-	 * per-cpu queue.
+-	 */
+-	if (cpu != padata->cpu) {
+-		reorder_via_wq = 1;
+-		cpu = padata->cpu;
+-	}
+-
+-	pqueue = per_cpu_ptr(pd->pqueue, cpu);
++	struct parallel_data *pd = padata->pd;
++	struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,
++							   padata->cpu);
+ 
+ 	spin_lock(&pqueue->reorder.lock);
+ 	list_add_tail(&padata->list, &pqueue->reorder.list);
+@@ -355,8 +340,6 @@ void padata_do_serial(struct padata_priv *padata)
+ 	 */
+ 	smp_mb__after_atomic();
+ 
+-	put_cpu();
+-
+ 	padata_reorder(pd);
+ }
+ EXPORT_SYMBOL(padata_do_serial);
+-- 
+2.25.1
+
diff --git a/queue-4.19/padata-replace-delayed-timer-with-immediate-workqueu.patch b/queue-4.19/padata-replace-delayed-timer-with-immediate-workqueu.patch
new file mode 100644
index 00000000000..e29ee1cc238
--- /dev/null
+++ b/queue-4.19/padata-replace-delayed-timer-with-immediate-workqueu.patch
@@ -0,0 +1,308 @@
+From 764a06d22a28ef1f4f43cda3a9718f4dfb29c184 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 May 2020 16:40:49 -0400
+Subject: padata: Replace delayed timer with immediate workqueue in
+ padata_reorder
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit 6fc4dbcf0276279d488c5fbbfabe94734134f4fa ]
+
+The function padata_reorder will use a timer when it cannot progress
+while completed jobs are outstanding (pd->reorder_objects > 0).  This
+is suboptimal as if we do end up using the timer then it would have
+introduced a gratuitous delay of one second.
+
+In fact we can easily distinguish between whether completed jobs
+are outstanding and whether we can make progress.  All we have to
+do is look at the next pqueue list.
+
+This patch does that by replacing pd->processed with pd->cpu so
+that the next pqueue is more accessible.
+
+A work queue is used instead of the original try_again to avoid
+hogging the CPU.
+
+Note that we don't bother removing the work queue in
+padata_flush_queues because the whole premise is broken.  You
+cannot flush async crypto requests so it makes no sense to even
+try.  A subsequent patch will fix it by replacing it with a ref
+counting scheme.
+
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+[dj: - adjust context
+     - corrected setup_timer -> timer_setup to delete hunk
+     - skip padata_flush_queues() hunk, function already removed
+       in 4.19]
+Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/padata.h | 13 ++----
+ kernel/padata.c        | 95 ++++++++----------------------------------
+ 2 files changed, 22 insertions(+), 86 deletions(-)
+
+diff --git a/include/linux/padata.h b/include/linux/padata.h
+index 5d13d25da2c8..d803397a28f7 100644
+--- a/include/linux/padata.h
++++ b/include/linux/padata.h
+@@ -24,7 +24,6 @@
+ #include <linux/workqueue.h>
+ #include <linux/spinlock.h>
+ #include <linux/list.h>
+-#include <linux/timer.h>
+ #include <linux/notifier.h>
+ #include <linux/kobject.h>
+ 
+@@ -85,18 +84,14 @@ struct padata_serial_queue {
+  * @serial: List to wait for serialization after reordering.
+  * @pwork: work struct for parallelization.
+  * @swork: work struct for serialization.
+- * @pd: Backpointer to the internal control structure.
+  * @work: work struct for parallelization.
+- * @reorder_work: work struct for reordering.
+  * @num_obj: Number of objects that are processed by this cpu.
+  * @cpu_index: Index of the cpu.
+  */
+ struct padata_parallel_queue {
+        struct padata_list    parallel;
+        struct padata_list    reorder;
+-       struct parallel_data *pd;
+        struct work_struct    work;
+-       struct work_struct    reorder_work;
+        atomic_t              num_obj;
+        int                   cpu_index;
+ };
+@@ -122,10 +117,10 @@ struct padata_cpumask {
+  * @reorder_objects: Number of objects waiting in the reorder queues.
+  * @refcnt: Number of objects holding a reference on this parallel_data.
+  * @max_seq_nr:  Maximal used sequence number.
++ * @cpu: Next CPU to be processed.
+  * @cpumask: The cpumasks in use for parallel and serial workers.
++ * @reorder_work: work struct for reordering.
+  * @lock: Reorder lock.
+- * @processed: Number of already processed objects.
+- * @timer: Reorder timer.
+  */
+ struct parallel_data {
+ 	struct padata_instance		*pinst;
+@@ -134,10 +129,10 @@ struct parallel_data {
+ 	atomic_t			reorder_objects;
+ 	atomic_t			refcnt;
+ 	atomic_t			seq_nr;
++	int				cpu;
+ 	struct padata_cpumask		cpumask;
++	struct work_struct		reorder_work;
+ 	spinlock_t                      lock ____cacheline_aligned;
+-	unsigned int			processed;
+-	struct timer_list		timer;
+ };
+ 
+ /**
+diff --git a/kernel/padata.c b/kernel/padata.c
+index c280cb153915..47dc31ce15ac 100644
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -167,23 +167,12 @@ EXPORT_SYMBOL(padata_do_parallel);
+  */
+ static struct padata_priv *padata_get_next(struct parallel_data *pd)
+ {
+-	int cpu, num_cpus;
+-	unsigned int next_nr, next_index;
+ 	struct padata_parallel_queue *next_queue;
+ 	struct padata_priv *padata;
+ 	struct padata_list *reorder;
++	int cpu = pd->cpu;
+ 
+-	num_cpus = cpumask_weight(pd->cpumask.pcpu);
+-
+-	/*
+-	 * Calculate the percpu reorder queue and the sequence
+-	 * number of the next object.
+-	 */
+-	next_nr = pd->processed;
+-	next_index = next_nr % num_cpus;
+-	cpu = padata_index_to_cpu(pd, next_index);
+ 	next_queue = per_cpu_ptr(pd->pqueue, cpu);
+-
+ 	reorder = &next_queue->reorder;
+ 
+ 	spin_lock(&reorder->lock);
+@@ -194,7 +183,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
+ 		list_del_init(&padata->list);
+ 		atomic_dec(&pd->reorder_objects);
+ 
+-		pd->processed++;
++		pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1,
++					    false);
+ 
+ 		spin_unlock(&reorder->lock);
+ 		goto out;
+@@ -217,6 +207,7 @@ static void padata_reorder(struct parallel_data *pd)
+ 	struct padata_priv *padata;
+ 	struct padata_serial_queue *squeue;
+ 	struct padata_instance *pinst = pd->pinst;
++	struct padata_parallel_queue *next_queue;
+ 
+ 	/*
+ 	 * We need to ensure that only one cpu can work on dequeueing of
+@@ -248,7 +239,6 @@ static void padata_reorder(struct parallel_data *pd)
+ 		 * so exit immediately.
+ 		 */
+ 		if (PTR_ERR(padata) == -ENODATA) {
+-			del_timer(&pd->timer);
+ 			spin_unlock_bh(&pd->lock);
+ 			return;
+ 		}
+@@ -267,70 +257,29 @@ static void padata_reorder(struct parallel_data *pd)
+ 
+ 	/*
+ 	 * The next object that needs serialization might have arrived to
+-	 * the reorder queues in the meantime, we will be called again
+-	 * from the timer function if no one else cares for it.
++	 * the reorder queues in the meantime.
+ 	 *
+-	 * Ensure reorder_objects is read after pd->lock is dropped so we see
+-	 * an increment from another task in padata_do_serial.  Pairs with
++	 * Ensure reorder queue is read after pd->lock is dropped so we see
++	 * new objects from another task in padata_do_serial.  Pairs with
+ 	 * smp_mb__after_atomic in padata_do_serial.
+ 	 */
+ 	smp_mb();
+-	if (atomic_read(&pd->reorder_objects)
+-			&& !(pinst->flags & PADATA_RESET))
+-		mod_timer(&pd->timer, jiffies + HZ);
+-	else
+-		del_timer(&pd->timer);
+ 
+-	return;
++	next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
++	if (!list_empty(&next_queue->reorder.list))
++		queue_work(pinst->wq, &pd->reorder_work);
+ }
+ 
+ static void invoke_padata_reorder(struct work_struct *work)
+ {
+-	struct padata_parallel_queue *pqueue;
+ 	struct parallel_data *pd;
+ 
+ 	local_bh_disable();
+-	pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
+-	pd = pqueue->pd;
++	pd = container_of(work, struct parallel_data, reorder_work);
+ 	padata_reorder(pd);
+ 	local_bh_enable();
+ }
+ 
+-static void padata_reorder_timer(struct timer_list *t)
+-{
+-	struct parallel_data *pd = from_timer(pd, t, timer);
+-	unsigned int weight;
+-	int target_cpu, cpu;
+-
+-	cpu = get_cpu();
+-
+-	/* We don't lock pd here to not interfere with parallel processing
+-	 * padata_reorder() calls on other CPUs. We just need any CPU out of
+-	 * the cpumask.pcpu set. It would be nice if it's the right one but
+-	 * it doesn't matter if we're off to the next one by using an outdated
+-	 * pd->processed value.
+-	 */
+-	weight = cpumask_weight(pd->cpumask.pcpu);
+-	target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
+-
+-	/* ensure to call the reorder callback on the correct CPU */
+-	if (cpu != target_cpu) {
+-		struct padata_parallel_queue *pqueue;
+-		struct padata_instance *pinst;
+-
+-		/* The timer function is serialized wrt itself -- no locking
+-		 * needed.
+-		 */
+-		pinst = pd->pinst;
+-		pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
+-		queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
+-	} else {
+-		padata_reorder(pd);
+-	}
+-
+-	put_cpu();
+-}
+-
+ static void padata_serial_worker(struct work_struct *serial_work)
+ {
+ 	struct padata_serial_queue *squeue;
+@@ -384,9 +333,8 @@ void padata_do_serial(struct padata_priv *padata)
+ 
+ 	cpu = get_cpu();
+ 
+-	/* We need to run on the same CPU padata_do_parallel(.., padata, ..)
+-	 * was called on -- or, at least, enqueue the padata object into the
+-	 * correct per-cpu queue.
++	/* We need to enqueue the padata object into the correct
++	 * per-cpu queue.
+ 	 */
+ 	if (cpu != padata->cpu) {
+ 		reorder_via_wq = 1;
+@@ -396,12 +344,12 @@ void padata_do_serial(struct padata_priv *padata)
+ 	pqueue = per_cpu_ptr(pd->pqueue, cpu);
+ 
+ 	spin_lock(&pqueue->reorder.lock);
+-	atomic_inc(&pd->reorder_objects);
+ 	list_add_tail(&padata->list, &pqueue->reorder.list);
++	atomic_inc(&pd->reorder_objects);
+ 	spin_unlock(&pqueue->reorder.lock);
+ 
+ 	/*
+-	 * Ensure the atomic_inc of reorder_objects above is ordered correctly
++	 * Ensure the addition to the reorder list is ordered correctly
+ 	 * with the trylock of pd->lock in padata_reorder.  Pairs with smp_mb
+ 	 * in padata_reorder.
+ 	 */
+@@ -409,13 +357,7 @@ void padata_do_serial(struct padata_priv *padata)
+ 
+ 	put_cpu();
+ 
+-	/* If we're running on the wrong CPU, call padata_reorder() via a
+-	 * kernel worker.
+-	 */
+-	if (reorder_via_wq)
+-		queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
+-	else
+-		padata_reorder(pd);
++	padata_reorder(pd);
+ }
+ EXPORT_SYMBOL(padata_do_serial);
+ 
+@@ -471,14 +413,12 @@ static void padata_init_pqueues(struct parallel_data *pd)
+ 			continue;
+ 		}
+ 
+-		pqueue->pd = pd;
+ 		pqueue->cpu_index = cpu_index;
+ 		cpu_index++;
+ 
+ 		__padata_list_init(&pqueue->reorder);
+ 		__padata_list_init(&pqueue->parallel);
+ 		INIT_WORK(&pqueue->work, padata_parallel_worker);
+-		INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
+ 		atomic_set(&pqueue->num_obj, 0);
+ 	}
+ }
+@@ -506,12 +446,13 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
+ 
+ 	padata_init_pqueues(pd);
+ 	padata_init_squeues(pd);
+-	timer_setup(&pd->timer, padata_reorder_timer, 0);
+ 	atomic_set(&pd->seq_nr, -1);
+ 	atomic_set(&pd->reorder_objects, 0);
+ 	atomic_set(&pd->refcnt, 1);
+ 	pd->pinst = pinst;
+ 	spin_lock_init(&pd->lock);
++	pd->cpu = cpumask_first(pcpumask);
++	INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
+ 
+ 	return pd;
+ 
+-- 
+2.25.1
+
diff --git a/queue-4.19/series b/queue-4.19/series
index c1b002dd3a1..00b5e0d375b 100644
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -30,3 +30,6 @@ stmmac-fix-pointer-check-after-utilization-in-stmmac.patch
 usb-core-fix-misleading-driver-bug-report.patch
 platform-x86-asus-nb-wmi-do-not-load-on-asus-t100ta-.patch
 arm-futex-address-build-warning.patch
+padata-replace-delayed-timer-with-immediate-workqueu.patch
+padata-initialize-pd-cpu-with-effective-cpumask.patch
+padata-purge-get_cpu-and-reorder_via_wq-from-padata_.patch