1 From: Nick Piggin <npiggin@suse.de>
2 Subject: mm: unlock_page speedup
4 Patch-upstream: no (could be submitted)
6 Introduce a new page flag, PG_waiters, to signal there are processes waiting on
7 PG_lock; and use it to avoid memory barriers and waitqueue hash lookup in the
10 lat_mmap profile goes from looking like this (after the mnt_want_write patches)
11 CPU: AMD64 family10, speed 2000 MHz (estimated)
12 Counted CPU_CLK_UNHALTED events (Cycles outside of halt state) with a unit mask of 0x00 (No unit mask) count 10000
14 254150 14.5889 __do_fault
15 163003 9.3568 unmap_vmas
16 110232 6.3276 mark_page_accessed
17 77864 4.4696 __up_read
18 75864 4.3548 page_waitqueue <<<<
19 69984 4.0173 handle_mm_fault
20 66945 3.8428 do_page_fault
21 66457 3.8148 retint_swapgs
22 65413 3.7549 shmem_getpage
23 62904 3.6109 file_update_time
24 61430 3.5262 set_page_dirty
25 53425 3.0667 unlock_page <<<<
28 3119 0.1430 unlock_page
29 0 0.0000 page_waitqueue
32 include/linux/page-flags.h | 4 +
33 include/linux/pagemap.h | 7 ++-
35 mm/filemap.c | 94 +++++++++++++++++++++++++++++++++++----------
36 4 files changed, 83 insertions(+), 25 deletions(-)
38 --- a/include/linux/page-flags.h
39 +++ b/include/linux/page-flags.h
43 PG_locked, /* Page is locked. Don't touch. */
44 + PG_waiters, /* Page has PG_locked waiters. */
48 @@ -171,6 +172,7 @@ static inline int PAGEMASK_##uname(void)
49 struct page; /* forward declaration */
51 TESTPAGEFLAG(Locked, locked)
52 +PAGEFLAG(Waiters, waiters)
53 PAGEFLAG(Error, error)
54 PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
55 PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
56 @@ -340,7 +342,7 @@ PAGEFLAG_FALSE(MemError)
59 #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \
60 - 1 << PG_buddy | 1 << PG_writeback | \
61 + 1 << PG_buddy | 1 << PG_writeback | 1 << PG_waiters | \
62 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active)
65 --- a/include/linux/pagemap.h
66 +++ b/include/linux/pagemap.h
67 @@ -161,7 +161,7 @@ static inline int page_cache_add_specula
68 if (unlikely(!atomic_add_unless(&page->_count, count, 0)))
71 - VM_BUG_ON(PageCompound(page) && page != compound_head(page));
72 + VM_BUG_ON(PageTail(page));
76 @@ -326,6 +326,7 @@ static inline void lock_page_nosync(stru
77 * Never use this directly!
79 extern void wait_on_page_bit(struct page *page, int bit_nr);
80 +extern void __wait_on_page_locked(struct page *page);
83 * Wait for a page to be unlocked.
84 @@ -336,8 +337,9 @@ extern void wait_on_page_bit(struct page
86 static inline void wait_on_page_locked(struct page *page)
90 - wait_on_page_bit(page, PG_locked);
91 + __wait_on_page_locked(page);
95 @@ -345,6 +347,7 @@ static inline void wait_on_page_locked(s
97 static inline void wait_on_page_writeback(struct page *page)
100 if (PageWriteback(page))
101 wait_on_page_bit(page, PG_writeback);
105 @@ -186,8 +186,7 @@ int wake_bit_function(wait_queue_t *wait
106 = container_of(wait, struct wait_bit_queue, wait);
108 if (wait_bit->key.flags != key->flags ||
109 - wait_bit->key.bit_nr != key->bit_nr ||
110 - test_bit(key->bit_nr, key->flags))
111 + wait_bit->key.bit_nr != key->bit_nr)
114 return autoremove_wake_function(wait, mode, sync, key);
117 @@ -181,6 +181,7 @@ static int sync_page(void *word)
118 if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
119 mapping->a_ops->sync_page(page);
125 @@ -513,12 +514,6 @@ struct page *__page_cache_alloc(gfp_t gf
126 EXPORT_SYMBOL(__page_cache_alloc);
129 -static int __sleep_on_page_lock(void *word)
136 * In order to wait for pages to become available there must be
137 * waitqueues associated with pages. By using a hash table of
138 @@ -551,6 +546,22 @@ void wait_on_page_bit(struct page *page,
140 EXPORT_SYMBOL(wait_on_page_bit);
143 + * If PageWaiters was found to be set at unlock time, __wake_page_waiters
144 + * should be called to actually perform the wakeup of waiters.
146 +static void __wake_page_waiters(struct page *page)
148 + ClearPageWaiters(page);
150 + * The smp_mb() is necessary to enforce ordering between the clear_bit
151 + * and the read of the waitqueue (to avoid SMP races with a parallel
152 + * __wait_on_page_locked()).
154 + smp_mb__after_clear_bit();
155 + wake_up_page(page, PG_locked);
159 * unlock_page - unlock a locked page
161 @@ -567,11 +578,10 @@ EXPORT_SYMBOL(wait_on_page_bit);
163 void unlock_page(struct page *page)
165 - smp_mb__before_clear_bit();
166 - if (!test_and_clear_bit(PG_locked, &page->flags))
168 - smp_mb__after_clear_bit();
169 - wake_up_page(page, PG_locked);
170 + VM_BUG_ON(!PageLocked(page));
171 + clear_bit_unlock(PG_locked, &page->flags);
172 + if (unlikely(PageWaiters(page)))
173 + __wake_page_waiters(page);
175 EXPORT_SYMBOL(unlock_page);
177 @@ -601,23 +611,60 @@ EXPORT_SYMBOL(end_page_writeback);
178 * chances are that on the second loop, the block layer's plug list is empty,
179 * so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
181 -void __lock_page(struct page *page)
182 +void __lock_page(struct page *page)
184 + wait_queue_head_t *wq = page_waitqueue(page);
185 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
187 - __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
188 - TASK_UNINTERRUPTIBLE);
190 + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
191 + SetPageWaiters(page);
192 + if (likely(PageLocked(page)))
194 + } while (!trylock_page(page));
195 + finish_wait(wq, &wait.wait);
197 EXPORT_SYMBOL(__lock_page);
199 -int __lock_page_killable(struct page *page)
200 +int __lock_page_killable(struct page *page)
202 + wait_queue_head_t *wq = page_waitqueue(page);
203 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
206 - return __wait_on_bit_lock(page_waitqueue(page), &wait,
207 - sync_page_killable, TASK_KILLABLE);
209 + prepare_to_wait(wq, &wait.wait, TASK_KILLABLE);
210 + SetPageWaiters(page);
211 + if (likely(PageLocked(page))) {
212 + err = sync_page_killable(page);
216 + } while (!trylock_page(page));
217 + finish_wait(wq, &wait.wait);
222 +void __wait_on_page_locked(struct page *page)
224 + wait_queue_head_t *wq = page_waitqueue(page);
225 + DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
228 + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
229 + SetPageWaiters(page);
230 + if (likely(PageLocked(page)))
232 + } while (PageLocked(page));
233 + finish_wait(wq, &wait.wait);
235 + /* Clean up a potentially dangling PG_waiters */
236 + if (unlikely(PageWaiters(page)))
237 + __wake_page_waiters(page);
239 +EXPORT_SYMBOL(__wait_on_page_locked);
242 * __lock_page_nosync - get a lock on the page, without calling sync_page()
243 * @page: the page to lock
244 @@ -625,11 +672,18 @@ int __lock_page_killable(struct page *pa
245 * Variant of lock_page that does not require the caller to hold a reference
246 * on the page's mapping.
248 -void __lock_page_nosync(struct page *page)
249 +void __lock_page_nosync(struct page *page)
251 + wait_queue_head_t *wq = page_waitqueue(page);
252 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
253 - __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
254 - TASK_UNINTERRUPTIBLE);
257 + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
258 + SetPageWaiters(page);
259 + if (likely(PageLocked(page)))
261 + } while (!trylock_page(page));
262 + finish_wait(wq, &wait.wait);