From 6dbcc07ce8e58bc5cf619811bbc3a0dd2b7db237 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 16 May 2019 22:10:55 -0400 Subject: [PATCH] fixes for 4.19 Signed-off-by: Sasha Levin --- ...event-decrement-of-reader-count-befo.patch | 129 ++++++++++++++++++ queue-4.19/series | 1 + 2 files changed, 130 insertions(+) create mode 100644 queue-4.19/locking-rwsem-prevent-decrement-of-reader-count-befo.patch create mode 100644 queue-4.19/series diff --git a/queue-4.19/locking-rwsem-prevent-decrement-of-reader-count-befo.patch b/queue-4.19/locking-rwsem-prevent-decrement-of-reader-count-befo.patch new file mode 100644 index 00000000000..942e4b99a3f --- /dev/null +++ b/queue-4.19/locking-rwsem-prevent-decrement-of-reader-count-befo.patch @@ -0,0 +1,129 @@ +From 0c0f1a4b80b3267ad4fe70089b07eb1820ed1c3d Mon Sep 17 00:00:00 2001 +From: Waiman Long +Date: Sun, 28 Apr 2019 17:25:38 -0400 +Subject: locking/rwsem: Prevent decrement of reader count before increment + +[ Upstream commit a9e9bcb45b1525ba7aea26ed9441e8632aeeda58 ] + +During my rwsem testing, it was found that after a down_read(), the +reader count may occasionally become 0 or even negative. Consequently, +a writer may steal the lock at that time and execute with the reader +in parallel thus breaking the mutual exclusion guarantee of the write +lock. In other words, both readers and writer can become rwsem owners +simultaneously. + +The current reader wakeup code does it in one pass to clear waiter->task +and put them into wake_q before fully incrementing the reader count. +Once waiter->task is cleared, the corresponding reader may see it, +finish the critical section and do unlock to decrement the count before +the count is incremented. This is not a problem if there is only one +reader to wake up as the count has been pre-incremented by 1. It is +a problem if there are more than one readers to be woken up and writer +can steal the lock. + +The wakeup was actually done in 2 passes before the following v4.9 commit: + + 70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once") + +To fix this problem, the wakeup is now done in two passes +again. In the first pass, we collect the readers and count them. +The reader count is then fully incremented. In the second pass, the +waiter->task is then cleared and they are put into wake_q to be woken +up later. + +Signed-off-by: Waiman Long +Acked-by: Linus Torvalds +Cc: Borislav Petkov +Cc: Davidlohr Bueso +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Tim Chen +Cc: Will Deacon +Cc: huang ying +Fixes: 70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once") +Link: http://lkml.kernel.org/r/20190428212557.13482-2-longman@redhat.com +Signed-off-by: Ingo Molnar +Signed-off-by: Sasha Levin +--- + kernel/locking/rwsem-xadd.c | 44 +++++++++++++++++++++++++------------ + 1 file changed, 30 insertions(+), 14 deletions(-) + +diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c +index ef909357b84e1..e41e4b4b5267b 100644 +--- a/kernel/locking/rwsem-xadd.c ++++ b/kernel/locking/rwsem-xadd.c +@@ -130,6 +130,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, + { + struct rwsem_waiter *waiter, *tmp; + long oldcount, woken = 0, adjustment = 0; ++ struct list_head wlist; + + /* + * Take a peek at the queue head waiter such that we can determine +@@ -188,18 +189,42 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, + * of the queue. We know that woken will be at least 1 as we accounted + * for above. Note we increment the 'active part' of the count by the + * number of readers before waking any processes up. ++ * ++ * We have to do wakeup in 2 passes to prevent the possibility that ++ * the reader count may be decremented before it is incremented. It ++ * is because the to-be-woken waiter may not have slept yet. So it ++ * may see waiter->task got cleared, finish its critical section and ++ * do an unlock before the reader count increment. ++ * ++ * 1) Collect the read-waiters in a separate list, count them and ++ * fully increment the reader count in rwsem. ++ * 2) For each waiters in the new list, clear waiter->task and ++ * put them into wake_q to be woken up later. + */ +- list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) { +- struct task_struct *tsk; +- ++ list_for_each_entry(waiter, &sem->wait_list, list) { + if (waiter->type == RWSEM_WAITING_FOR_WRITE) + break; + + woken++; +- tsk = waiter->task; ++ } ++ list_cut_before(&wlist, &sem->wait_list, &waiter->list); ++ ++ adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; ++ if (list_empty(&sem->wait_list)) { ++ /* hit end of list above */ ++ adjustment -= RWSEM_WAITING_BIAS; ++ } ++ ++ if (adjustment) ++ atomic_long_add(adjustment, &sem->count); ++ ++ /* 2nd pass */ ++ list_for_each_entry_safe(waiter, tmp, &wlist, list) { ++ struct task_struct *tsk; + ++ tsk = waiter->task; + get_task_struct(tsk); +- list_del(&waiter->list); ++ + /* + * Ensure calling get_task_struct() before setting the reader + * waiter to nil such that rwsem_down_read_failed() cannot +@@ -215,15 +240,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, + /* wake_q_add() already take the task ref */ + put_task_struct(tsk); + } +- +- adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; +- if (list_empty(&sem->wait_list)) { +- /* hit end of list above */ +- adjustment -= RWSEM_WAITING_BIAS; +- } +- +- if (adjustment) +- atomic_long_add(adjustment, &sem->count); + } + + /* +-- +2.20.1 + diff --git a/queue-4.19/series b/queue-4.19/series new file mode 100644 index 00000000000..c18ea0bf517 --- /dev/null +++ b/queue-4.19/series @@ -0,0 +1 @@ +locking-rwsem-prevent-decrement-of-reader-count-befo.patch -- 2.47.2