]>
Commit | Line | Data |
---|---|---|
63b339ea GKH |
1 | From stable-bounces@linux.kernel.org Mon Feb 4 22:27:35 2008 |
2 | From: Peter Zijlstra <a.p.zijlstra@chello.nl> | |
3 | Date: Mon, 04 Feb 2008 22:27:20 -0800 | |
4 | Subject: lockdep: annotate epoll | |
5 | To: torvalds@linux-foundation.org | |
6 | Cc: akpm@linux-foundation.org, stefanr@s5r6.in-berlin.de, a.p.zijlstra@chello.nl, davidel@xmailserver.org, stable@kernel.org | |
7 | Message-ID: <200802050627.m156R10c006376@imap1.linux-foundation.org> | |
8 | ||
9 | ||
10 | From: Peter Zijlstra <a.p.zijlstra@chello.nl> | |
11 | ||
12 | patch 0ccf831cbee94df9c5006dd46248c0f07847dd7c in mainline. | |
13 | ||
14 | On Sat, 2008-01-05 at 13:35 -0800, Davide Libenzi wrote: | |
15 | ||
16 | > I remember I talked with Arjan about this time ago. Basically, since 1) | |
17 | > you can drop an epoll fd inside another epoll fd 2) callback-based wakeups | |
18 | > are used, you can see a wake_up() from inside another wake_up(), but they | |
19 | > will never refer to the same lock instance. | |
20 | > Think about: | |
21 | > | |
22 | > dfd = socket(...); | |
23 | > efd1 = epoll_create(); | |
24 | > efd2 = epoll_create(); | |
25 | > epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...); | |
26 | > epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...); | |
27 | > | |
28 | > When a packet arrives to the device underneath "dfd", the net code will | |
29 | > issue a wake_up() on its poll wake list. Epoll (efd1) has installed a | |
30 | > callback wakeup entry on that queue, and the wake_up() performed by the | |
31 | > "dfd" net code will end up in ep_poll_callback(). At this point epoll | |
32 | > (efd1) notices that it may have some event ready, so it needs to wake up | |
33 | > the waiters on its poll wait list (efd2). So it calls ep_poll_safewake() | |
34 | > that ends up in another wake_up(), after having checked about the | |
35 | > recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to | |
36 | > avoid stack blasting. Never hit the same queue, to avoid loops like: | |
37 | > | |
38 | > epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...); | |
39 | > epoll_ctl(efd3, EPOLL_CTL_ADD, efd2, ...); | |
40 | > epoll_ctl(efd4, EPOLL_CTL_ADD, efd3, ...); | |
41 | > epoll_ctl(efd1, EPOLL_CTL_ADD, efd4, ...); | |
42 | > | |
43 | > The code "if (tncur->wq == wq || ..." prevents re-entering the same | |
44 | > queue/lock. | |
45 | ||
46 | Since the epoll code is very careful to not nest same instance locks | |
47 | allow the recursion. | |
48 | ||
49 | Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> | |
50 | Tested-by: Stefan Richter <stefanr@s5r6.in-berlin.de> | |
51 | Acked-by: Davide Libenzi <davidel@xmailserver.org> | |
52 | Signed-off-by: Andrew Morton <akpm@linux-foundation.org> | |
53 | Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> | |
54 | ||
55 | --- | |
56 | fs/eventpoll.c | 2 +- | |
57 | include/linux/wait.h | 16 ++++++++++++++++ | |
58 | 2 files changed, 17 insertions(+), 1 deletion(-) | |
59 | ||
60 | --- a/fs/eventpoll.c | |
61 | +++ b/fs/eventpoll.c | |
62 | @@ -353,7 +353,7 @@ static void ep_poll_safewake(struct poll | |
63 | spin_unlock_irqrestore(&psw->lock, flags); | |
64 | ||
65 | /* Do really wake up now */ | |
66 | - wake_up(wq); | |
67 | + wake_up_nested(wq, 1 + wake_nests); | |
68 | ||
69 | /* Remove the current task from the list */ | |
70 | spin_lock_irqsave(&psw->lock, flags); | |
71 | --- a/include/linux/wait.h | |
72 | +++ b/include/linux/wait.h | |
73 | @@ -161,6 +161,22 @@ wait_queue_head_t *FASTCALL(bit_waitqueu | |
74 | #define wake_up_locked(x) __wake_up_locked((x), TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE) | |
75 | #define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1) | |
76 | ||
77 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
78 | +/* | |
79 | + * macro to avoid include hell | |
80 | + */ | |
81 | +#define wake_up_nested(x, s) \ | |
82 | +do { \ | |
83 | + unsigned long flags; \ | |
84 | + \ | |
85 | + spin_lock_irqsave_nested(&(x)->lock, flags, (s)); \ | |
86 | + wake_up_locked(x); \ | |
87 | + spin_unlock_irqrestore(&(x)->lock, flags); \ | |
88 | +} while (0) | |
89 | +#else | |
90 | +#define wake_up_nested(x, s) wake_up(x) | |
91 | +#endif | |
92 | + | |
93 | #define __wait_event(wq, condition) \ | |
94 | do { \ | |
95 | DEFINE_WAIT(__wait); \ |