]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Nick Piggin <npiggin@suse.de> |
2 | Subject: mm: unlock_page speedup | |
3 | References: bnc#436953 | |
4 | Patch-upstream: no (could be submitted) | |
5 | ||
6 | Introduce a new page flag, PG_waiters, to signal there are processes waiting on | |
7 | PG_lock; and use it to avoid memory barriers and waitqueue hash lookup in the | |
8 | unlock_page fastpath. | |
9 | ||
10 | lat_mmap profile goes from looking like this (after the mnt_want_write patches) | |
11 | CPU: AMD64 family10, speed 2000 MHz (estimated) | |
12 | Counted CPU_CLK_UNHALTED events (Cycles outside of halt state) with a unit mask of 0x00 (No unit mask) count 10000 | |
13 | samples % symbol name | |
14 | 254150 14.5889 __do_fault | |
15 | 163003 9.3568 unmap_vmas | |
16 | 110232 6.3276 mark_page_accessed | |
17 | 77864 4.4696 __up_read | |
18 | 75864 4.3548 page_waitqueue <<<< | |
19 | 69984 4.0173 handle_mm_fault | |
20 | 66945 3.8428 do_page_fault | |
21 | 66457 3.8148 retint_swapgs | |
22 | 65413 3.7549 shmem_getpage | |
23 | 62904 3.6109 file_update_time | |
24 | 61430 3.5262 set_page_dirty | |
25 | 53425 3.0667 unlock_page <<<< | |
26 | ||
27 | To this: | |
28 | 3119 0.1430 unlock_page | |
29 | 0 0.0000 page_waitqueue | |
30 | ||
31 | --- | |
32 | include/linux/page-flags.h | 4 + | |
33 | include/linux/pagemap.h | 7 ++- | |
34 | kernel/wait.c | 3 - | |
35 | mm/filemap.c | 94 +++++++++++++++++++++++++++++++++++---------- | |
36 | 4 files changed, 83 insertions(+), 25 deletions(-) | |
37 | ||
38 | --- a/include/linux/page-flags.h | |
39 | +++ b/include/linux/page-flags.h | |
40 | @@ -71,6 +71,7 @@ | |
41 | */ | |
42 | enum pageflags { | |
43 | PG_locked, /* Page is locked. Don't touch. */ | |
44 | + PG_waiters, /* Page has PG_locked waiters. */ | |
45 | PG_error, | |
46 | PG_referenced, | |
47 | PG_uptodate, | |
48 | @@ -171,6 +172,7 @@ static inline int PAGEMASK_##uname(void) | |
49 | struct page; /* forward declaration */ | |
50 | ||
51 | TESTPAGEFLAG(Locked, locked) | |
52 | +PAGEFLAG(Waiters, waiters) | |
53 | PAGEFLAG(Error, error) | |
54 | PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) | |
55 | PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) | |
56 | @@ -340,7 +342,7 @@ PAGEFLAG_FALSE(MemError) | |
57 | #endif | |
58 | ||
59 | #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ | |
60 | - 1 << PG_buddy | 1 << PG_writeback | \ | |
61 | + 1 << PG_buddy | 1 << PG_writeback | 1 << PG_waiters | \ | |
62 | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active) | |
63 | ||
64 | /* | |
65 | --- a/include/linux/pagemap.h | |
66 | +++ b/include/linux/pagemap.h | |
67 | @@ -161,7 +161,7 @@ static inline int page_cache_add_specula | |
68 | if (unlikely(!atomic_add_unless(&page->_count, count, 0))) | |
69 | return 0; | |
70 | #endif | |
71 | - VM_BUG_ON(PageCompound(page) && page != compound_head(page)); | |
72 | + VM_BUG_ON(PageTail(page)); | |
73 | ||
74 | return 1; | |
75 | } | |
76 | @@ -326,6 +326,7 @@ static inline void lock_page_nosync(stru | |
77 | * Never use this directly! | |
78 | */ | |
79 | extern void wait_on_page_bit(struct page *page, int bit_nr); | |
80 | +extern void __wait_on_page_locked(struct page *page); | |
81 | ||
82 | /* | |
83 | * Wait for a page to be unlocked. | |
84 | @@ -336,8 +337,9 @@ extern void wait_on_page_bit(struct page | |
85 | */ | |
86 | static inline void wait_on_page_locked(struct page *page) | |
87 | { | |
88 | + might_sleep(); | |
89 | if (PageLocked(page)) | |
90 | - wait_on_page_bit(page, PG_locked); | |
91 | + __wait_on_page_locked(page); | |
92 | } | |
93 | ||
94 | /* | |
95 | @@ -345,6 +347,7 @@ static inline void wait_on_page_locked(s | |
96 | */ | |
97 | static inline void wait_on_page_writeback(struct page *page) | |
98 | { | |
99 | + might_sleep(); | |
100 | if (PageWriteback(page)) | |
101 | wait_on_page_bit(page, PG_writeback); | |
102 | } | |
103 | --- a/kernel/wait.c | |
104 | +++ b/kernel/wait.c | |
105 | @@ -186,8 +186,7 @@ int wake_bit_function(wait_queue_t *wait | |
106 | = container_of(wait, struct wait_bit_queue, wait); | |
107 | ||
108 | if (wait_bit->key.flags != key->flags || | |
109 | - wait_bit->key.bit_nr != key->bit_nr || | |
110 | - test_bit(key->bit_nr, key->flags)) | |
111 | + wait_bit->key.bit_nr != key->bit_nr) | |
112 | return 0; | |
113 | else | |
114 | return autoremove_wake_function(wait, mode, sync, key); | |
115 | --- a/mm/filemap.c | |
116 | +++ b/mm/filemap.c | |
117 | @@ -181,6 +181,7 @@ static int sync_page(void *word) | |
118 | if (mapping && mapping->a_ops && mapping->a_ops->sync_page) | |
119 | mapping->a_ops->sync_page(page); | |
120 | io_schedule(); | |
121 | + | |
122 | return 0; | |
123 | } | |
124 | ||
125 | @@ -513,12 +514,6 @@ struct page *__page_cache_alloc(gfp_t gf | |
126 | EXPORT_SYMBOL(__page_cache_alloc); | |
127 | #endif | |
128 | ||
129 | -static int __sleep_on_page_lock(void *word) | |
130 | -{ | |
131 | - io_schedule(); | |
132 | - return 0; | |
133 | -} | |
134 | - | |
135 | /* | |
136 | * In order to wait for pages to become available there must be | |
137 | * waitqueues associated with pages. By using a hash table of | |
138 | @@ -551,6 +546,22 @@ void wait_on_page_bit(struct page *page, | |
139 | } | |
140 | EXPORT_SYMBOL(wait_on_page_bit); | |
141 | ||
142 | +/* | |
143 | + * If PageWaiters was found to be set at unlock time, __wake_page_waiters | |
144 | + * should be called to actually perform the wakeup of waiters. | |
145 | + */ | |
146 | +static void __wake_page_waiters(struct page *page) | |
147 | +{ | |
148 | + ClearPageWaiters(page); | |
149 | + /* | |
150 | + * The smp_mb() is necessary to enforce ordering between the clear_bit | |
151 | + * and the read of the waitqueue (to avoid SMP races with a parallel | |
152 | + * __wait_on_page_locked()). | |
153 | + */ | |
154 | + smp_mb__after_clear_bit(); | |
155 | + wake_up_page(page, PG_locked); | |
156 | +} | |
157 | + | |
158 | /** | |
159 | * unlock_page - unlock a locked page | |
160 | * @page: the page | |
161 | @@ -567,11 +578,10 @@ EXPORT_SYMBOL(wait_on_page_bit); | |
162 | */ | |
163 | void unlock_page(struct page *page) | |
164 | { | |
165 | - smp_mb__before_clear_bit(); | |
166 | - if (!test_and_clear_bit(PG_locked, &page->flags)) | |
167 | - BUG(); | |
168 | - smp_mb__after_clear_bit(); | |
169 | - wake_up_page(page, PG_locked); | |
170 | + VM_BUG_ON(!PageLocked(page)); | |
171 | + clear_bit_unlock(PG_locked, &page->flags); | |
172 | + if (unlikely(PageWaiters(page))) | |
173 | + __wake_page_waiters(page); | |
174 | } | |
175 | EXPORT_SYMBOL(unlock_page); | |
176 | ||
177 | @@ -601,23 +611,60 @@ EXPORT_SYMBOL(end_page_writeback); | |
178 | * chances are that on the second loop, the block layer's plug list is empty, | |
179 | * so sync_page() will then return in state TASK_UNINTERRUPTIBLE. | |
180 | */ | |
181 | -void __lock_page(struct page *page) | |
182 | +void __lock_page(struct page *page) | |
183 | { | |
184 | + wait_queue_head_t *wq = page_waitqueue(page); | |
185 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | |
186 | ||
187 | - __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page, | |
188 | - TASK_UNINTERRUPTIBLE); | |
189 | + do { | |
190 | + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | |
191 | + SetPageWaiters(page); | |
192 | + if (likely(PageLocked(page))) | |
193 | + sync_page(page); | |
194 | + } while (!trylock_page(page)); | |
195 | + finish_wait(wq, &wait.wait); | |
196 | } | |
197 | EXPORT_SYMBOL(__lock_page); | |
198 | ||
199 | -int __lock_page_killable(struct page *page) | |
200 | +int __lock_page_killable(struct page *page) | |
201 | { | |
202 | + wait_queue_head_t *wq = page_waitqueue(page); | |
203 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | |
204 | + int err = 0; | |
205 | ||
206 | - return __wait_on_bit_lock(page_waitqueue(page), &wait, | |
207 | - sync_page_killable, TASK_KILLABLE); | |
208 | + do { | |
209 | + prepare_to_wait(wq, &wait.wait, TASK_KILLABLE); | |
210 | + SetPageWaiters(page); | |
211 | + if (likely(PageLocked(page))) { | |
212 | + err = sync_page_killable(page); | |
213 | + if (err) | |
214 | + break; | |
215 | + } | |
216 | + } while (!trylock_page(page)); | |
217 | + finish_wait(wq, &wait.wait); | |
218 | + | |
219 | + return err; | |
220 | } | |
221 | ||
222 | +void __wait_on_page_locked(struct page *page) | |
223 | +{ | |
224 | + wait_queue_head_t *wq = page_waitqueue(page); | |
225 | + DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | |
226 | + | |
227 | + do { | |
228 | + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | |
229 | + SetPageWaiters(page); | |
230 | + if (likely(PageLocked(page))) | |
231 | + sync_page(page); | |
232 | + } while (PageLocked(page)); | |
233 | + finish_wait(wq, &wait.wait); | |
234 | + | |
235 | + /* Clean up a potentially dangling PG_waiters */ | |
236 | + if (unlikely(PageWaiters(page))) | |
237 | + __wake_page_waiters(page); | |
238 | +} | |
239 | +EXPORT_SYMBOL(__wait_on_page_locked); | |
240 | + | |
241 | /** | |
242 | * __lock_page_nosync - get a lock on the page, without calling sync_page() | |
243 | * @page: the page to lock | |
244 | @@ -625,11 +672,18 @@ int __lock_page_killable(struct page *pa | |
245 | * Variant of lock_page that does not require the caller to hold a reference | |
246 | * on the page's mapping. | |
247 | */ | |
248 | -void __lock_page_nosync(struct page *page) | |
249 | +void __lock_page_nosync(struct page *page) | |
250 | { | |
251 | + wait_queue_head_t *wq = page_waitqueue(page); | |
252 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | |
253 | - __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock, | |
254 | - TASK_UNINTERRUPTIBLE); | |
255 | + | |
256 | + do { | |
257 | + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | |
258 | + SetPageWaiters(page); | |
259 | + if (likely(PageLocked(page))) | |
260 | + io_schedule(); | |
261 | + } while (!trylock_page(page)); | |
262 | + finish_wait(wq, &wait.wait); | |
263 | } | |
264 | ||
265 | /** |