]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.19.45/locking-rwsem-prevent-decrement-of-reader-count-befo.patch
Linux 4.19.45
[thirdparty/kernel/stable-queue.git] / releases / 4.19.45 / locking-rwsem-prevent-decrement-of-reader-count-befo.patch
1 From 0c0f1a4b80b3267ad4fe70089b07eb1820ed1c3d Mon Sep 17 00:00:00 2001
2 From: Waiman Long <longman@redhat.com>
3 Date: Sun, 28 Apr 2019 17:25:38 -0400
4 Subject: locking/rwsem: Prevent decrement of reader count before increment
5
6 [ Upstream commit a9e9bcb45b1525ba7aea26ed9441e8632aeeda58 ]
7
8 During my rwsem testing, it was found that after a down_read(), the
9 reader count may occasionally become 0 or even negative. Consequently,
10 a writer may steal the lock at that time and execute with the reader
11 in parallel thus breaking the mutual exclusion guarantee of the write
12 lock. In other words, both readers and writer can become rwsem owners
13 simultaneously.
14
15 The current reader wakeup code does it in one pass to clear waiter->task
16 and put them into wake_q before fully incrementing the reader count.
17 Once waiter->task is cleared, the corresponding reader may see it,
18 finish the critical section and do unlock to decrement the count before
19 the count is incremented. This is not a problem if there is only one
20 reader to wake up as the count has been pre-incremented by 1. It is
21 a problem if there are more than one readers to be woken up and writer
22 can steal the lock.
23
24 The wakeup was actually done in 2 passes before the following v4.9 commit:
25
26 70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once")
27
28 To fix this problem, the wakeup is now done in two passes
29 again. In the first pass, we collect the readers and count them.
30 The reader count is then fully incremented. In the second pass, the
31 waiter->task is then cleared and they are put into wake_q to be woken
32 up later.
33
34 Signed-off-by: Waiman Long <longman@redhat.com>
35 Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
36 Cc: Borislav Petkov <bp@alien8.de>
37 Cc: Davidlohr Bueso <dave@stgolabs.net>
38 Cc: Peter Zijlstra <peterz@infradead.org>
39 Cc: Thomas Gleixner <tglx@linutronix.de>
40 Cc: Tim Chen <tim.c.chen@linux.intel.com>
41 Cc: Will Deacon <will.deacon@arm.com>
42 Cc: huang ying <huang.ying.caritas@gmail.com>
43 Fixes: 70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once")
44 Link: http://lkml.kernel.org/r/20190428212557.13482-2-longman@redhat.com
45 Signed-off-by: Ingo Molnar <mingo@kernel.org>
46 Signed-off-by: Sasha Levin <sashal@kernel.org>
47 ---
48 kernel/locking/rwsem-xadd.c | 44 ++++++++++++++++++++++++++++++--------------
49 1 file changed, 30 insertions(+), 14 deletions(-)
50
51 --- a/kernel/locking/rwsem-xadd.c
52 +++ b/kernel/locking/rwsem-xadd.c
53 @@ -130,6 +130,7 @@ static void __rwsem_mark_wake(struct rw_
54 {
55 struct rwsem_waiter *waiter, *tmp;
56 long oldcount, woken = 0, adjustment = 0;
57 + struct list_head wlist;
58
59 /*
60 * Take a peek at the queue head waiter such that we can determine
61 @@ -188,18 +189,42 @@ static void __rwsem_mark_wake(struct rw_
62 * of the queue. We know that woken will be at least 1 as we accounted
63 * for above. Note we increment the 'active part' of the count by the
64 * number of readers before waking any processes up.
65 + *
66 + * We have to do wakeup in 2 passes to prevent the possibility that
67 + * the reader count may be decremented before it is incremented. It
68 + * is because the to-be-woken waiter may not have slept yet. So it
69 + * may see waiter->task got cleared, finish its critical section and
70 + * do an unlock before the reader count increment.
71 + *
72 + * 1) Collect the read-waiters in a separate list, count them and
73 + * fully increment the reader count in rwsem.
74 + * 2) For each waiters in the new list, clear waiter->task and
75 + * put them into wake_q to be woken up later.
76 */
77 - list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
78 - struct task_struct *tsk;
79 -
80 + list_for_each_entry(waiter, &sem->wait_list, list) {
81 if (waiter->type == RWSEM_WAITING_FOR_WRITE)
82 break;
83
84 woken++;
85 - tsk = waiter->task;
86 + }
87 + list_cut_before(&wlist, &sem->wait_list, &waiter->list);
88 +
89 + adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
90 + if (list_empty(&sem->wait_list)) {
91 + /* hit end of list above */
92 + adjustment -= RWSEM_WAITING_BIAS;
93 + }
94 +
95 + if (adjustment)
96 + atomic_long_add(adjustment, &sem->count);
97
98 + /* 2nd pass */
99 + list_for_each_entry_safe(waiter, tmp, &wlist, list) {
100 + struct task_struct *tsk;
101 +
102 + tsk = waiter->task;
103 get_task_struct(tsk);
104 - list_del(&waiter->list);
105 +
106 /*
107 * Ensure calling get_task_struct() before setting the reader
108 * waiter to nil such that rwsem_down_read_failed() cannot
109 @@ -215,15 +240,6 @@ static void __rwsem_mark_wake(struct rw_
110 /* wake_q_add() already take the task ref */
111 put_task_struct(tsk);
112 }
113 -
114 - adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
115 - if (list_empty(&sem->wait_list)) {
116 - /* hit end of list above */
117 - adjustment -= RWSEM_WAITING_BIAS;
118 - }
119 -
120 - if (adjustment)
121 - atomic_long_add(adjustment, &sem->count);
122 }
123
124 /*