fixes for 4.19

author Sasha Levin <sashal@kernel.org>

Fri, 17 May 2019 02:10:55 +0000 (22:10 -0400)

committer Sasha Levin <sashal@kernel.org>

Fri, 17 May 2019 02:10:55 +0000 (22:10 -0400)
author Sasha Levin <sashal@kernel.org>
Fri, 17 May 2019 02:10:55 +0000 (22:10 -0400)
committer Sasha Levin <sashal@kernel.org>
Fri, 17 May 2019 02:10:55 +0000 (22:10 -0400)
diff --git a/queue-4.19/locking-rwsem-prevent-decrement-of-reader-count-befo.patch b/queue-4.19/locking-rwsem-prevent-decrement-of-reader-count-befo.patch

new file mode 100644 (file)

index 0000000..942e4b9
--- /dev/null
+++ b/queue-4.19/locking-rwsem-prevent-decrement-of-reader-count-befo.patch
@@ -0,0 +1,129 @@
+From 0c0f1a4b80b3267ad4fe70089b07eb1820ed1c3d Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Sun, 28 Apr 2019 17:25:38 -0400
+Subject: locking/rwsem: Prevent decrement of reader count before increment
+
+[ Upstream commit a9e9bcb45b1525ba7aea26ed9441e8632aeeda58 ]
+
+During my rwsem testing, it was found that after a down_read(), the
+reader count may occasionally become 0 or even negative. Consequently,
+a writer may steal the lock at that time and execute with the reader
+in parallel thus breaking the mutual exclusion guarantee of the write
+lock. In other words, both readers and writer can become rwsem owners
+simultaneously.
+
+The current reader wakeup code does it in one pass to clear waiter->task
+and put them into wake_q before fully incrementing the reader count.
+Once waiter->task is cleared, the corresponding reader may see it,
+finish the critical section and do unlock to decrement the count before
+the count is incremented. This is not a problem if there is only one
+reader to wake up as the count has been pre-incremented by 1.  It is
+a problem if there are more than one readers to be woken up and writer
+can steal the lock.
+
+The wakeup was actually done in 2 passes before the following v4.9 commit:
+
+  70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once")
+
+To fix this problem, the wakeup is now done in two passes
+again. In the first pass, we collect the readers and count them.
+The reader count is then fully incremented. In the second pass, the
+waiter->task is then cleared and they are put into wake_q to be woken
+up later.
+
+Signed-off-by: Waiman Long <longman@redhat.com>
+Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: huang ying <huang.ying.caritas@gmail.com>
+Fixes: 70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once")
+Link: http://lkml.kernel.org/r/20190428212557.13482-2-longman@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/locking/rwsem-xadd.c | 44 +++++++++++++++++++++++++------------
+ 1 file changed, 30 insertions(+), 14 deletions(-)
+
+diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
+index ef909357b84e1..e41e4b4b5267b 100644
+--- a/kernel/locking/rwsem-xadd.c
++++ b/kernel/locking/rwsem-xadd.c
+@@ -130,6 +130,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
+ {
+       struct rwsem_waiter *waiter, *tmp;
+       long oldcount, woken = 0, adjustment = 0;
++      struct list_head wlist;
+ 
+       /*
+        * Take a peek at the queue head waiter such that we can determine
+@@ -188,18 +189,42 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
+        * of the queue. We know that woken will be at least 1 as we accounted
+        * for above. Note we increment the 'active part' of the count by the
+        * number of readers before waking any processes up.
++       *
++       * We have to do wakeup in 2 passes to prevent the possibility that
++       * the reader count may be decremented before it is incremented. It
++       * is because the to-be-woken waiter may not have slept yet. So it
++       * may see waiter->task got cleared, finish its critical section and
++       * do an unlock before the reader count increment.
++       *
++       * 1) Collect the read-waiters in a separate list, count them and
++       *    fully increment the reader count in rwsem.
++       * 2) For each waiters in the new list, clear waiter->task and
++       *    put them into wake_q to be woken up later.
+        */
+-      list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
+-              struct task_struct *tsk;
+-
++      list_for_each_entry(waiter, &sem->wait_list, list) {
+               if (waiter->type == RWSEM_WAITING_FOR_WRITE)
+                       break;
+ 
+               woken++;
+-              tsk = waiter->task;
++      }
++      list_cut_before(&wlist, &sem->wait_list, &waiter->list);
++
++      adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
++      if (list_empty(&sem->wait_list)) {
++              /* hit end of list above */
++              adjustment -= RWSEM_WAITING_BIAS;
++      }
++
++      if (adjustment)
++              atomic_long_add(adjustment, &sem->count);
++
++      /* 2nd pass */
++      list_for_each_entry_safe(waiter, tmp, &wlist, list) {
++              struct task_struct *tsk;
+ 
++              tsk = waiter->task;
+               get_task_struct(tsk);
+-              list_del(&waiter->list);
++
+               /*
+                * Ensure calling get_task_struct() before setting the reader
+                * waiter to nil such that rwsem_down_read_failed() cannot
+@@ -215,15 +240,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
+               /* wake_q_add() already take the task ref */
+               put_task_struct(tsk);
+       }
+-
+-      adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+-      if (list_empty(&sem->wait_list)) {
+-              /* hit end of list above */
+-              adjustment -= RWSEM_WAITING_BIAS;
+-      }
+-
+-      if (adjustment)
+-              atomic_long_add(adjustment, &sem->count);
+ }
+ 
+ /*
+-- 
+2.20.1
+
diff --git a/queue-4.19/series b/queue-4.19/series

new file mode 100644 (file)

index 0000000..c18ea0b
--- /dev/null
+++ b/queue-4.19/series
@@ -0,0 +1 @@
+locking-rwsem-prevent-decrement-of-reader-count-befo.patch
author	Sasha Levin <sashal@kernel.org>
	Fri, 17 May 2019 02:10:55 +0000 (22:10 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Fri, 17 May 2019 02:10:55 +0000 (22:10 -0400)
queue-4.19/locking-rwsem-prevent-decrement-of-reader-count-befo.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series	[new file with mode: 0644]	patch \| blob