]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/3.1.1/epoll-fix-spurious-lockdep-warnings.patch
4.14-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 3.1.1 / epoll-fix-spurious-lockdep-warnings.patch
1 From d8805e633e054c816c47cb6e727c81f156d9253d Mon Sep 17 00:00:00 2001
2 From: Nelson Elhage <nelhage@nelhage.com>
3 Date: Mon, 31 Oct 2011 17:13:14 -0700
4 Subject: epoll: fix spurious lockdep warnings
5
6 From: Nelson Elhage <nelhage@nelhage.com>
7
8 commit d8805e633e054c816c47cb6e727c81f156d9253d upstream.
9
10 epoll can acquire recursively acquire ep->mtx on multiple "struct
11 eventpoll"s at once in the case where one epoll fd is monitoring another
12 epoll fd. This is perfectly OK, since we're careful about the lock
13 ordering, but it causes spurious lockdep warnings. Annotate the recursion
14 using mutex_lock_nested, and add a comment explaining the nesting rules
15 for good measure.
16
17 Recent versions of systemd are triggering this, and it can also be
18 demonstrated with the following trivial test program:
19
20 --------------------8<--------------------
21
22 int main(void) {
23 int e1, e2;
24 struct epoll_event evt = {
25 .events = EPOLLIN
26 };
27
28 e1 = epoll_create1(0);
29 e2 = epoll_create1(0);
30 epoll_ctl(e1, EPOLL_CTL_ADD, e2, &evt);
31 return 0;
32 }
33 --------------------8<--------------------
34
35 Reported-by: Paul Bolle <pebolle@tiscali.nl>
36 Tested-by: Paul Bolle <pebolle@tiscali.nl>
37 Signed-off-by: Nelson Elhage <nelhage@nelhage.com>
38 Acked-by: Jason Baron <jbaron@redhat.com>
39 Cc: Dave Jones <davej@redhat.com>
40 Cc: Davide Libenzi <davidel@xmailserver.org>
41 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
42 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
43 Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
44
45 ---
46 fs/eventpoll.c | 25 ++++++++++++++++++-------
47 1 file changed, 18 insertions(+), 7 deletions(-)
48
49 --- a/fs/eventpoll.c
50 +++ b/fs/eventpoll.c
51 @@ -70,6 +70,15 @@
52 * simultaneous inserts (A into B and B into A) from racing and
53 * constructing a cycle without either insert observing that it is
54 * going to.
55 + * It is necessary to acquire multiple "ep->mtx"es at once in the
56 + * case when one epoll fd is added to another. In this case, we
57 + * always acquire the locks in the order of nesting (i.e. after
58 + * epoll_ctl(e1, EPOLL_CTL_ADD, e2), e1->mtx will always be acquired
59 + * before e2->mtx). Since we disallow cycles of epoll file
60 + * descriptors, this ensures that the mutexes are well-ordered. In
61 + * order to communicate this nesting to lockdep, when walking a tree
62 + * of epoll file descriptors, we use the current recursion depth as
63 + * the lockdep subkey.
64 * It is possible to drop the "ep->mtx" and to use the global
65 * mutex "epmutex" (together with "ep->lock") to have it working,
66 * but having "ep->mtx" will make the interface more scalable.
67 @@ -464,13 +473,15 @@ static void ep_unregister_pollwait(struc
68 * @ep: Pointer to the epoll private data structure.
69 * @sproc: Pointer to the scan callback.
70 * @priv: Private opaque data passed to the @sproc callback.
71 + * @depth: The current depth of recursive f_op->poll calls.
72 *
73 * Returns: The same integer error code returned by the @sproc callback.
74 */
75 static int ep_scan_ready_list(struct eventpoll *ep,
76 int (*sproc)(struct eventpoll *,
77 struct list_head *, void *),
78 - void *priv)
79 + void *priv,
80 + int depth)
81 {
82 int error, pwake = 0;
83 unsigned long flags;
84 @@ -481,7 +492,7 @@ static int ep_scan_ready_list(struct eve
85 * We need to lock this because we could be hit by
86 * eventpoll_release_file() and epoll_ctl().
87 */
88 - mutex_lock(&ep->mtx);
89 + mutex_lock_nested(&ep->mtx, depth);
90
91 /*
92 * Steal the ready list, and re-init the original one to the
93 @@ -670,7 +681,7 @@ static int ep_read_events_proc(struct ev
94
95 static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
96 {
97 - return ep_scan_ready_list(priv, ep_read_events_proc, NULL);
98 + return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
99 }
100
101 static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
102 @@ -737,7 +748,7 @@ void eventpoll_release_file(struct file
103
104 ep = epi->ep;
105 list_del_init(&epi->fllink);
106 - mutex_lock(&ep->mtx);
107 + mutex_lock_nested(&ep->mtx, 0);
108 ep_remove(ep, epi);
109 mutex_unlock(&ep->mtx);
110 }
111 @@ -1134,7 +1145,7 @@ static int ep_send_events(struct eventpo
112 esed.maxevents = maxevents;
113 esed.events = events;
114
115 - return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
116 + return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
117 }
118
119 static inline struct timespec ep_set_mstimeout(long ms)
120 @@ -1267,7 +1278,7 @@ static int ep_loop_check_proc(void *priv
121 struct rb_node *rbp;
122 struct epitem *epi;
123
124 - mutex_lock(&ep->mtx);
125 + mutex_lock_nested(&ep->mtx, call_nests + 1);
126 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
127 epi = rb_entry(rbp, struct epitem, rbn);
128 if (unlikely(is_file_epoll(epi->ffd.file))) {
129 @@ -1409,7 +1420,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, in
130 }
131
132
133 - mutex_lock(&ep->mtx);
134 + mutex_lock_nested(&ep->mtx, 0);
135
136 /*
137 * Try to lookup the file inside our RB tree, Since we grabbed "mtx"