--- /dev/null
+From 350ef88e7e922354f82a931897ad4a4ce6c686ff Mon Sep 17 00:00:00 2001
+From: Mathias Krause <minipli@googlemail.com>
+Date: Fri, 8 Sep 2017 20:57:11 +0200
+Subject: padata: ensure padata_do_serial() runs on the correct CPU
+
+From: Mathias Krause <minipli@googlemail.com>
+
+commit 350ef88e7e922354f82a931897ad4a4ce6c686ff upstream.
+
+If the algorithm we're parallelizing is asynchronous we might change
+CPUs between padata_do_parallel() and padata_do_serial(). However, we
+don't expect this to happen as we need to enqueue the padata object into
+the per-cpu reorder queue we took it from, i.e. the same-cpu's parallel
+queue.
+
+Ensure we're not switching CPUs for a given padata object by tracking
+the CPU within the padata object. If the serial callback gets called on
+the wrong CPU, defer invoking padata_reorder() via a kernel worker on
+the CPU we're expected to run on.
+
+Signed-off-by: Mathias Krause <minipli@googlemail.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/padata.h | 2 ++
+ kernel/padata.c | 20 +++++++++++++++++++-
+ 2 files changed, 21 insertions(+), 1 deletion(-)
+
+--- a/include/linux/padata.h
++++ b/include/linux/padata.h
+@@ -37,6 +37,7 @@
+ * @list: List entry, to attach to the padata lists.
+ * @pd: Pointer to the internal control structure.
+ * @cb_cpu: Callback cpu for serializatioon.
++ * @cpu: Cpu for parallelization.
+ * @seq_nr: Sequence number of the parallelized data object.
+ * @info: Used to pass information from the parallel to the serial function.
+ * @parallel: Parallel execution function.
+@@ -46,6 +47,7 @@ struct padata_priv {
+ struct list_head list;
+ struct parallel_data *pd;
+ int cb_cpu;
++ int cpu;
+ int info;
+ void (*parallel)(struct padata_priv *padata);
+ void (*serial)(struct padata_priv *padata);
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -133,6 +133,7 @@ int padata_do_parallel(struct padata_ins
+ padata->cb_cpu = cb_cpu;
+
+ target_cpu = padata_cpu_hash(pd);
++ padata->cpu = target_cpu;
+ queue = per_cpu_ptr(pd->pqueue, target_cpu);
+
+ spin_lock(&queue->parallel.lock);
+@@ -376,10 +377,21 @@ void padata_do_serial(struct padata_priv
+ int cpu;
+ struct padata_parallel_queue *pqueue;
+ struct parallel_data *pd;
++ int reorder_via_wq = 0;
+
+ pd = padata->pd;
+
+ cpu = get_cpu();
++
++ /* We need to run on the same CPU padata_do_parallel(.., padata, ..)
++ * was called on -- or, at least, enqueue the padata object into the
++ * correct per-cpu queue.
++ */
++ if (cpu != padata->cpu) {
++ reorder_via_wq = 1;
++ cpu = padata->cpu;
++ }
++
+ pqueue = per_cpu_ptr(pd->pqueue, cpu);
+
+ spin_lock(&pqueue->reorder.lock);
+@@ -396,7 +408,13 @@ void padata_do_serial(struct padata_priv
+
+ put_cpu();
+
+- padata_reorder(pd);
++ /* If we're running on the wrong CPU, call padata_reorder() via a
++ * kernel worker.
++ */
++ if (reorder_via_wq)
++ queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
++ else
++ padata_reorder(pd);
+ }
+ EXPORT_SYMBOL(padata_do_serial);
+
--- /dev/null
+From cf5868c8a22dc2854b96e9569064bb92365549ca Mon Sep 17 00:00:00 2001
+From: Mathias Krause <minipli@googlemail.com>
+Date: Fri, 8 Sep 2017 20:57:10 +0200
+Subject: padata: ensure the reorder timer callback runs on the correct CPU
+
+From: Mathias Krause <minipli@googlemail.com>
+
+commit cf5868c8a22dc2854b96e9569064bb92365549ca upstream.
+
+The reorder timer function runs on the CPU where the timer interrupt was
+handled which is not necessarily one of the CPUs of the 'pcpu' CPU mask
+set.
+
+Ensure the padata_reorder() callback runs on the correct CPU, which is
+one in the 'pcpu' CPU mask set and, preferrably, the next expected one.
+Do so by comparing the current CPU with the expected target CPU. If they
+match, call padata_reorder() right away. If they differ, schedule a work
+item on the target CPU that does the padata_reorder() call for us.
+
+Signed-off-by: Mathias Krause <minipli@googlemail.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/padata.h | 2 ++
+ kernel/padata.c | 43 ++++++++++++++++++++++++++++++++++++++++++-
+ 2 files changed, 44 insertions(+), 1 deletion(-)
+
+--- a/include/linux/padata.h
++++ b/include/linux/padata.h
+@@ -85,6 +85,7 @@ struct padata_serial_queue {
+ * @swork: work struct for serialization.
+ * @pd: Backpointer to the internal control structure.
+ * @work: work struct for parallelization.
++ * @reorder_work: work struct for reordering.
+ * @num_obj: Number of objects that are processed by this cpu.
+ * @cpu_index: Index of the cpu.
+ */
+@@ -93,6 +94,7 @@ struct padata_parallel_queue {
+ struct padata_list reorder;
+ struct parallel_data *pd;
+ struct work_struct work;
++ struct work_struct reorder_work;
+ atomic_t num_obj;
+ int cpu_index;
+ };
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -282,11 +282,51 @@ static void padata_reorder(struct parall
+ return;
+ }
+
++static void invoke_padata_reorder(struct work_struct *work)
++{
++ struct padata_parallel_queue *pqueue;
++ struct parallel_data *pd;
++
++ local_bh_disable();
++ pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
++ pd = pqueue->pd;
++ padata_reorder(pd);
++ local_bh_enable();
++}
++
+ static void padata_reorder_timer(unsigned long arg)
+ {
+ struct parallel_data *pd = (struct parallel_data *)arg;
++ unsigned int weight;
++ int target_cpu, cpu;
+
+- padata_reorder(pd);
++ cpu = get_cpu();
++
++ /* We don't lock pd here to not interfere with parallel processing
++ * padata_reorder() calls on other CPUs. We just need any CPU out of
++ * the cpumask.pcpu set. It would be nice if it's the right one but
++ * it doesn't matter if we're off to the next one by using an outdated
++ * pd->processed value.
++ */
++ weight = cpumask_weight(pd->cpumask.pcpu);
++ target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
++
++ /* ensure to call the reorder callback on the correct CPU */
++ if (cpu != target_cpu) {
++ struct padata_parallel_queue *pqueue;
++ struct padata_instance *pinst;
++
++ /* The timer function is serialized wrt itself -- no locking
++ * needed.
++ */
++ pinst = pd->pinst;
++ pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
++ queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
++ } else {
++ padata_reorder(pd);
++ }
++
++ put_cpu();
+ }
+
+ static void padata_serial_worker(struct work_struct *serial_work)
+@@ -413,6 +453,7 @@ static void padata_init_pqueues(struct p
+ __padata_list_init(&pqueue->reorder);
+ __padata_list_init(&pqueue->parallel);
+ INIT_WORK(&pqueue->work, padata_parallel_worker);
++ INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
+ atomic_set(&pqueue->num_obj, 0);
+ }
+ }