]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 11 Aug 2018 17:11:54 +0000 (19:11 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 11 Aug 2018 17:11:54 +0000 (19:11 +0200)
added patches:
mark-hi-and-tasklet-softirq-synchronous.patch
stop_machine-disable-preemption-after-queueing-stopper-threads.patch

queue-4.14/mark-hi-and-tasklet-softirq-synchronous.patch [new file with mode: 0644]
queue-4.14/series
queue-4.14/stop_machine-disable-preemption-after-queueing-stopper-threads.patch [new file with mode: 0644]

diff --git a/queue-4.14/mark-hi-and-tasklet-softirq-synchronous.patch b/queue-4.14/mark-hi-and-tasklet-softirq-synchronous.patch
new file mode 100644 (file)
index 0000000..678b1ba
--- /dev/null
@@ -0,0 +1,103 @@
+From 3c53776e29f81719efcf8f7a6e30cdf753bee94d Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Mon, 8 Jan 2018 11:51:04 -0800
+Subject: Mark HI and TASKLET softirq synchronous
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 3c53776e29f81719efcf8f7a6e30cdf753bee94d upstream.
+
+Way back in 4.9, we committed 4cd13c21b207 ("softirq: Let ksoftirqd do
+its job"), and ever since we've had small nagging issues with it.  For
+example, we've had:
+
+  1ff688209e2e ("watchdog: core: make sure the watchdog_worker is not deferred")
+  8d5755b3f77b ("watchdog: softdog: fire watchdog even if softirqs do not get to run")
+  217f69743681 ("net: busy-poll: allow preemption in sk_busy_loop()")
+
+all of which worked around some of the effects of that commit.
+
+The DVB people have also complained that the commit causes excessive USB
+URB latencies, which seems to be due to the USB code using tasklets to
+schedule USB traffic.  This seems to be an issue mainly when already
+living on the edge, but waiting for ksoftirqd to handle it really does
+seem to cause excessive latencies.
+
+Now Hanna Hawa reports that this issue isn't just limited to USB URB and
+DVB, but also causes timeout problems for the Marvell SoC team:
+
+ "I'm facing kernel panic issue while running raid 5 on sata disks
+  connected to Macchiatobin (Marvell community board with Armada-8040
+  SoC with 4 ARMv8 cores of CA72) Raid 5 built with Marvell DMA engine
+  and async_tx mechanism (ASYNC_TX_DMA [=y]); the DMA driver (mv_xor_v2)
+  uses a tasklet to clean the done descriptors from the queue"
+
+The latency problem causes a panic:
+
+  mv_xor_v2 f0400000.xor: dma_sync_wait: timeout!
+  Kernel panic - not syncing: async_tx_quiesce: DMA error waiting for transaction
+
+We've discussed simply just reverting the original commit entirely, and
+also much more involved solutions (with per-softirq threads etc).  This
+patch is intentionally stupid and fairly limited, because the issue
+still remains, and the other solutions either got sidetracked or had
+other issues.
+
+We should probably also consider the timer softirqs to be synchronous
+and not be delayed to ksoftirqd (since they were the issue with the
+earlier watchdog problems), but that should be done as a separate patch.
+This does only the tasklet cases.
+
+Reported-and-tested-by: Hanna Hawa <hannah@marvell.com>
+Reported-and-tested-by: Josef Griebichler <griebichler.josef@gmx.at>
+Reported-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
+Cc: Alan Stern <stern@rowland.harvard.edu>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/softirq.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -79,12 +79,16 @@ static void wakeup_softirqd(void)
+ /*
+  * If ksoftirqd is scheduled, we do not want to process pending softirqs
+- * right now. Let ksoftirqd handle this at its own rate, to get fairness.
++ * right now. Let ksoftirqd handle this at its own rate, to get fairness,
++ * unless we're doing some of the synchronous softirqs.
+  */
+-static bool ksoftirqd_running(void)
++#define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ))
++static bool ksoftirqd_running(unsigned long pending)
+ {
+       struct task_struct *tsk = __this_cpu_read(ksoftirqd);
++      if (pending & SOFTIRQ_NOW_MASK)
++              return false;
+       return tsk && (tsk->state == TASK_RUNNING);
+ }
+@@ -324,7 +328,7 @@ asmlinkage __visible void do_softirq(voi
+       pending = local_softirq_pending();
+-      if (pending && !ksoftirqd_running())
++      if (pending && !ksoftirqd_running(pending))
+               do_softirq_own_stack();
+       local_irq_restore(flags);
+@@ -351,7 +355,7 @@ void irq_enter(void)
+ static inline void invoke_softirq(void)
+ {
+-      if (ksoftirqd_running())
++      if (ksoftirqd_running(local_softirq_pending()))
+               return;
+       if (!force_irqthreads) {
index c008499a650b173c52bbbebb22bc6bfe0a418726..aa198b8585c1181e26db16853564ffd6c83c03a0 100644 (file)
@@ -4,3 +4,5 @@ scsi-hpsa-fix-selection-of-reply-queue.patch
 scsi-core-introduce-force_blk_mq.patch
 scsi-virtio_scsi-fix-io-hang-caused-by-automatic-irq-vector-affinity.patch
 kasan-add-no_sanitize-attribute-for-clang-builds.patch
+mark-hi-and-tasklet-softirq-synchronous.patch
+stop_machine-disable-preemption-after-queueing-stopper-threads.patch
diff --git a/queue-4.14/stop_machine-disable-preemption-after-queueing-stopper-threads.patch b/queue-4.14/stop_machine-disable-preemption-after-queueing-stopper-threads.patch
new file mode 100644 (file)
index 0000000..a8c40a3
--- /dev/null
@@ -0,0 +1,91 @@
+From 2610e88946632afb78aa58e61f11368ac4c0af7b Mon Sep 17 00:00:00 2001
+From: "Isaac J. Manjarres" <isaacm@codeaurora.org>
+Date: Tue, 17 Jul 2018 12:35:29 -0700
+Subject: stop_machine: Disable preemption after queueing stopper threads
+
+From: Isaac J. Manjarres <isaacm@codeaurora.org>
+
+commit 2610e88946632afb78aa58e61f11368ac4c0af7b upstream.
+
+This commit:
+
+  9fb8d5dc4b64 ("stop_machine, Disable preemption when waking two stopper threads")
+
+does not fully address the race condition that can occur
+as follows:
+
+On one CPU, call it CPU 3, thread 1 invokes
+cpu_stop_queue_two_works(2, 3,...), and the execution is such
+that thread 1 queues the works for migration/2 and migration/3,
+and is preempted after releasing the locks for migration/2 and
+migration/3, but before waking the threads.
+
+Then, On CPU 2, a kworker, call it thread 2, is running,
+and it invokes cpu_stop_queue_two_works(1, 2,...), such that
+thread 2 queues the works for migration/1 and migration/2.
+Meanwhile, on CPU 3, thread 1 resumes execution, and wakes
+migration/2 and migration/3. This means that when CPU 2
+releases the locks for migration/1 and migration/2, but before
+it wakes those threads, it can be preempted by migration/2.
+
+If thread 2 is preempted by migration/2, then migration/2 will
+execute the first work item successfully, since migration/3
+was woken up by CPU 3, but when it goes to execute the second
+work item, it disables preemption, calls multi_cpu_stop(),
+and thus, CPU 2 will wait forever for migration/1, which should
+have been woken up by thread 2. However migration/1 cannot be
+woken up by thread 2, since it is a kworker, so it is affine to
+CPU 2, but CPU 2 is running migration/2 with preemption
+disabled, so thread 2 will never run.
+
+Disable preemption after queueing works for stopper threads
+to ensure that the operation of queueing the works and waking
+the stopper threads is atomic.
+
+Co-Developed-by: Prasad Sodagudi <psodagud@codeaurora.org>
+Co-Developed-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
+Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
+Signed-off-by: Prasad Sodagudi <psodagud@codeaurora.org>
+Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: bigeasy@linutronix.de
+Cc: gregkh@linuxfoundation.org
+Cc: matt@codeblueprint.co.uk
+Fixes: 9fb8d5dc4b64 ("stop_machine, Disable preemption when waking two stopper threads")
+Link: http://lkml.kernel.org/r/1531856129-9871-1-git-send-email-isaacm@codeaurora.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/stop_machine.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/kernel/stop_machine.c
++++ b/kernel/stop_machine.c
+@@ -260,6 +260,15 @@ retry:
+       err = 0;
+       __cpu_stop_queue_work(stopper1, work1, &wakeq);
+       __cpu_stop_queue_work(stopper2, work2, &wakeq);
++      /*
++       * The waking up of stopper threads has to happen
++       * in the same scheduling context as the queueing.
++       * Otherwise, there is a possibility of one of the
++       * above stoppers being woken up by another CPU,
++       * and preempting us. This will cause us to n ot
++       * wake up the other stopper forever.
++       */
++      preempt_disable();
+ unlock:
+       raw_spin_unlock(&stopper2->lock);
+       raw_spin_unlock_irq(&stopper1->lock);
+@@ -271,7 +280,6 @@ unlock:
+       }
+       if (!err) {
+-              preempt_disable();
+               wake_up_q(&wakeq);
+               preempt_enable();
+       }