--- /dev/null
+From: Nick Piggin <npiggin@suse.de>
+Subject: mm: unlock_page speedup
+References: bnc#436953
+Patch-upstream: no (could be submitted)
+
+Introduce a new page flag, PG_waiters, to signal there are processes waiting on
+PG_lock; and use it to avoid memory barriers and waitqueue hash lookup in the
+unlock_page fastpath.
+
+lat_mmap profile goes from looking like this (after the mnt_want_write patches)
+CPU: AMD64 family10, speed 2000 MHz (estimated)
+Counted CPU_CLK_UNHALTED events (Cycles outside of halt state) with a unit mask of 0x00 (No unit mask) count 10000
+samples % symbol name
+254150 14.5889 __do_fault
+163003 9.3568 unmap_vmas
+110232 6.3276 mark_page_accessed
+77864 4.4696 __up_read
+75864 4.3548 page_waitqueue <<<<
+69984 4.0173 handle_mm_fault
+66945 3.8428 do_page_fault
+66457 3.8148 retint_swapgs
+65413 3.7549 shmem_getpage
+62904 3.6109 file_update_time
+61430 3.5262 set_page_dirty
+53425 3.0667 unlock_page <<<<
+
+To this:
+3119 0.1430 unlock_page
+0 0.0000 page_waitqueue
+
+---
+ include/linux/page-flags.h | 4 +
+ include/linux/pagemap.h | 7 ++-
+ kernel/wait.c | 3 -
+ mm/filemap.c | 94 +++++++++++++++++++++++++++++++++++----------
+ 4 files changed, 83 insertions(+), 25 deletions(-)
+
+--- a/include/linux/page-flags.h
++++ b/include/linux/page-flags.h
+@@ -71,6 +71,7 @@
+ */
+ enum pageflags {
+ PG_locked, /* Page is locked. Don't touch. */
++ PG_waiters, /* Page has PG_locked waiters. */
+ PG_error,
+ PG_referenced,
+ PG_uptodate,
+@@ -171,6 +172,7 @@ static inline int PAGEMASK_##uname(void)
+ struct page; /* forward declaration */
+
+ TESTPAGEFLAG(Locked, locked)
++PAGEFLAG(Waiters, waiters)
+ PAGEFLAG(Error, error)
+ PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
+ PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
+@@ -340,7 +342,7 @@ PAGEFLAG_FALSE(MemError)
+ #endif
+
+ #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \
+- 1 << PG_buddy | 1 << PG_writeback | \
++ 1 << PG_buddy | 1 << PG_writeback | 1 << PG_waiters | \
+ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active)
+
+ /*
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -161,7 +161,7 @@ static inline int page_cache_add_specula
+ if (unlikely(!atomic_add_unless(&page->_count, count, 0)))
+ return 0;
+ #endif
+- VM_BUG_ON(PageCompound(page) && page != compound_head(page));
++ VM_BUG_ON(PageTail(page));
+
+ return 1;
+ }
+@@ -326,6 +326,7 @@ static inline void lock_page_nosync(stru
+ * Never use this directly!
+ */
+ extern void wait_on_page_bit(struct page *page, int bit_nr);
++extern void __wait_on_page_locked(struct page *page);
+
+ /*
+ * Wait for a page to be unlocked.
+@@ -336,8 +337,9 @@ extern void wait_on_page_bit(struct page
+ */
+ static inline void wait_on_page_locked(struct page *page)
+ {
++ might_sleep();
+ if (PageLocked(page))
+- wait_on_page_bit(page, PG_locked);
++ __wait_on_page_locked(page);
+ }
+
+ /*
+@@ -345,6 +347,7 @@ static inline void wait_on_page_locked(s
+ */
+ static inline void wait_on_page_writeback(struct page *page)
+ {
++ might_sleep();
+ if (PageWriteback(page))
+ wait_on_page_bit(page, PG_writeback);
+ }
+--- a/kernel/wait.c
++++ b/kernel/wait.c
+@@ -186,8 +186,7 @@ int wake_bit_function(wait_queue_t *wait
+ = container_of(wait, struct wait_bit_queue, wait);
+
+ if (wait_bit->key.flags != key->flags ||
+- wait_bit->key.bit_nr != key->bit_nr ||
+- test_bit(key->bit_nr, key->flags))
++ wait_bit->key.bit_nr != key->bit_nr)
+ return 0;
+ else
+ return autoremove_wake_function(wait, mode, sync, key);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -181,6 +181,7 @@ static int sync_page(void *word)
+ if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
+ mapping->a_ops->sync_page(page);
+ io_schedule();
++
+ return 0;
+ }
+
+@@ -513,12 +514,6 @@ struct page *__page_cache_alloc(gfp_t gf
+ EXPORT_SYMBOL(__page_cache_alloc);
+ #endif
+
+-static int __sleep_on_page_lock(void *word)
+-{
+- io_schedule();
+- return 0;
+-}
+-
+ /*
+ * In order to wait for pages to become available there must be
+ * waitqueues associated with pages. By using a hash table of
+@@ -551,6 +546,22 @@ void wait_on_page_bit(struct page *page,
+ }
+ EXPORT_SYMBOL(wait_on_page_bit);
+
++/*
++ * If PageWaiters was found to be set at unlock time, __wake_page_waiters
++ * should be called to actually perform the wakeup of waiters.
++ */
++static void __wake_page_waiters(struct page *page)
++{
++ ClearPageWaiters(page);
++ /*
++ * The smp_mb() is necessary to enforce ordering between the clear_bit
++ * and the read of the waitqueue (to avoid SMP races with a parallel
++ * __wait_on_page_locked()).
++ */
++ smp_mb__after_clear_bit();
++ wake_up_page(page, PG_locked);
++}
++
+ /**
+ * unlock_page - unlock a locked page
+ * @page: the page
+@@ -567,11 +578,10 @@ EXPORT_SYMBOL(wait_on_page_bit);
+ */
+ void unlock_page(struct page *page)
+ {
+- smp_mb__before_clear_bit();
+- if (!test_and_clear_bit(PG_locked, &page->flags))
+- BUG();
+- smp_mb__after_clear_bit();
+- wake_up_page(page, PG_locked);
++ VM_BUG_ON(!PageLocked(page));
++ clear_bit_unlock(PG_locked, &page->flags);
++ if (unlikely(PageWaiters(page)))
++ __wake_page_waiters(page);
+ }
+ EXPORT_SYMBOL(unlock_page);
+
+@@ -601,23 +611,60 @@ EXPORT_SYMBOL(end_page_writeback);
+ * chances are that on the second loop, the block layer's plug list is empty,
+ * so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
+ */
+-void __lock_page(struct page *page)
++void __lock_page(struct page *page)
+ {
++ wait_queue_head_t *wq = page_waitqueue(page);
+ DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+
+- __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
+- TASK_UNINTERRUPTIBLE);
++ do {
++ prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
++ SetPageWaiters(page);
++ if (likely(PageLocked(page)))
++ sync_page(page);
++ } while (!trylock_page(page));
++ finish_wait(wq, &wait.wait);
+ }
+ EXPORT_SYMBOL(__lock_page);
+
+-int __lock_page_killable(struct page *page)
++int __lock_page_killable(struct page *page)
+ {
++ wait_queue_head_t *wq = page_waitqueue(page);
+ DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
++ int err = 0;
+
+- return __wait_on_bit_lock(page_waitqueue(page), &wait,
+- sync_page_killable, TASK_KILLABLE);
++ do {
++ prepare_to_wait(wq, &wait.wait, TASK_KILLABLE);
++ SetPageWaiters(page);
++ if (likely(PageLocked(page))) {
++ err = sync_page_killable(page);
++ if (err)
++ break;
++ }
++ } while (!trylock_page(page));
++ finish_wait(wq, &wait.wait);
++
++ return err;
+ }
+
++void __wait_on_page_locked(struct page *page)
++{
++ wait_queue_head_t *wq = page_waitqueue(page);
++ DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
++
++ do {
++ prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
++ SetPageWaiters(page);
++ if (likely(PageLocked(page)))
++ sync_page(page);
++ } while (PageLocked(page));
++ finish_wait(wq, &wait.wait);
++
++ /* Clean up a potentially dangling PG_waiters */
++ if (unlikely(PageWaiters(page)))
++ __wake_page_waiters(page);
++}
++EXPORT_SYMBOL(__wait_on_page_locked);
++
+ /**
+ * __lock_page_nosync - get a lock on the page, without calling sync_page()
+ * @page: the page to lock
+@@ -625,11 +672,18 @@ int __lock_page_killable(struct page *pa
+ * Variant of lock_page that does not require the caller to hold a reference
+ * on the page's mapping.
+ */
+-void __lock_page_nosync(struct page *page)
++void __lock_page_nosync(struct page *page)
+ {
++ wait_queue_head_t *wq = page_waitqueue(page);
+ DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+- __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
+- TASK_UNINTERRUPTIBLE);
++
++ do {
++ prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
++ SetPageWaiters(page);
++ if (likely(PageLocked(page)))
++ io_schedule();
++ } while (!trylock_page(page));
++ finish_wait(wq, &wait.wait);
+ }
+
+ /**