--- /dev/null
+From 85046579bde15e532983438f86b36856e358f417 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 20 Jan 2012 14:34:19 -0800
+Subject: SHM_UNLOCK: fix long unpreemptible section
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 85046579bde15e532983438f86b36856e358f417 upstream.
+
+scan_mapping_unevictable_pages() is used to make SysV SHM_LOCKed pages
+evictable again once the shared memory is unlocked. It does this with
+pagevec_lookup()s across the whole object (which might occupy most of
+memory), and takes 300ms to unlock 7GB here. A cond_resched() every
+PAGEVEC_SIZE pages would be good.
+
+However, KOSAKI-san points out that this is called under shmem.c's
+info->lock, and it's also under shm.c's shm_lock(), both spinlocks.
+There is no strong reason for that: we need to take these pages off the
+unevictable list soonish, but those locks are not required for it.
+
+So move the call to scan_mapping_unevictable_pages() from shmem.c's
+unlock handling up to shm.c's unlock handling. Remove the recently
+added barrier, not needed now we have spin_unlock() before the scan.
+
+Use get_file(), with subsequent fput(), to make sure we have a reference
+to mapping throughout scan_mapping_unevictable_pages(): that's something
+that was previously guaranteed by the shm_lock().
+
+Remove shmctl's lru_add_drain_all(): we don't fault in pages at SHM_LOCK
+time, and we lazily discover them to be Unevictable later, so it serves
+no purpose for SHM_LOCK; and serves no purpose for SHM_UNLOCK, since
+pages still on pagevec are not marked Unevictable.
+
+The original code avoided redundant rescans by checking VM_LOCKED flag
+at its level: now avoid them by checking shp's SHM_LOCKED.
+
+The original code called scan_mapping_unevictable_pages() on a locked
+area at shm_destroy() time: perhaps we once had accounting cross-checks
+which required that, but not now, so skip the overhead and just let
+inode eviction deal with them.
+
+Put check_move_unevictable_page() and scan_mapping_unevictable_pages()
+under CONFIG_SHMEM (with stub for the TINY case when ramfs is used),
+more as comment than to save space; comment them used for SHM_UNLOCK.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Minchan Kim <minchan.kim@gmail.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Shaohua Li <shaohua.li@intel.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michel Lespinasse <walken@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ ipc/shm.c | 37 ++++++++++++++++++++++---------------
+ mm/shmem.c | 7 -------
+ mm/vmscan.c | 12 +++++++++++-
+ 3 files changed, 33 insertions(+), 23 deletions(-)
+
+--- a/ipc/shm.c
++++ b/ipc/shm.c
+@@ -870,9 +870,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int,
+ case SHM_LOCK:
+ case SHM_UNLOCK:
+ {
+- struct file *uninitialized_var(shm_file);
+-
+- lru_add_drain_all(); /* drain pagevecs to lru lists */
++ struct file *shm_file;
+
+ shp = shm_lock_check(ns, shmid);
+ if (IS_ERR(shp)) {
+@@ -895,22 +893,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int,
+ err = security_shm_shmctl(shp, cmd);
+ if (err)
+ goto out_unlock;
+-
+- if(cmd==SHM_LOCK) {
++
++ shm_file = shp->shm_file;
++ if (is_file_hugepages(shm_file))
++ goto out_unlock;
++
++ if (cmd == SHM_LOCK) {
+ struct user_struct *user = current_user();
+- if (!is_file_hugepages(shp->shm_file)) {
+- err = shmem_lock(shp->shm_file, 1, user);
+- if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){
+- shp->shm_perm.mode |= SHM_LOCKED;
+- shp->mlock_user = user;
+- }
++ err = shmem_lock(shm_file, 1, user);
++ if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
++ shp->shm_perm.mode |= SHM_LOCKED;
++ shp->mlock_user = user;
+ }
+- } else if (!is_file_hugepages(shp->shm_file)) {
+- shmem_lock(shp->shm_file, 0, shp->mlock_user);
+- shp->shm_perm.mode &= ~SHM_LOCKED;
+- shp->mlock_user = NULL;
++ goto out_unlock;
+ }
++
++ /* SHM_UNLOCK */
++ if (!(shp->shm_perm.mode & SHM_LOCKED))
++ goto out_unlock;
++ shmem_lock(shm_file, 0, shp->mlock_user);
++ shp->shm_perm.mode &= ~SHM_LOCKED;
++ shp->mlock_user = NULL;
++ get_file(shm_file);
+ shm_unlock(shp);
++ scan_mapping_unevictable_pages(shm_file->f_mapping);
++ fput(shm_file);
+ goto out;
+ }
+ case IPC_RMID:
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -1068,13 +1068,6 @@ int shmem_lock(struct file *file, int lo
+ user_shm_unlock(inode->i_size, user);
+ info->flags &= ~VM_LOCKED;
+ mapping_clear_unevictable(file->f_mapping);
+- /*
+- * Ensure that a racing putback_lru_page() can see
+- * the pages of this mapping are evictable when we
+- * skip them due to !PageLRU during the scan.
+- */
+- smp_mb__after_clear_bit();
+- scan_mapping_unevictable_pages(file->f_mapping);
+ }
+ retval = 0;
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -3353,6 +3353,7 @@ int page_evictable(struct page *page, st
+ return 1;
+ }
+
++#ifdef CONFIG_SHMEM
+ /**
+ * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
+ * @page: page to check evictability and move to appropriate lru list
+@@ -3363,6 +3364,8 @@ int page_evictable(struct page *page, st
+ *
+ * Restrictions: zone->lru_lock must be held, page must be on LRU and must
+ * have PageUnevictable set.
++ *
++ * This function is only used for SysV IPC SHM_UNLOCK.
+ */
+ static void check_move_unevictable_page(struct page *page, struct zone *zone)
+ {
+@@ -3396,6 +3399,8 @@ retry:
+ *
+ * Scan all pages in mapping. Check unevictable pages for
+ * evictability and move them to the appropriate zone lru list.
++ *
++ * This function is only used for SysV IPC SHM_UNLOCK.
+ */
+ void scan_mapping_unevictable_pages(struct address_space *mapping)
+ {
+@@ -3441,9 +3446,14 @@ void scan_mapping_unevictable_pages(stru
+ pagevec_release(&pvec);
+
+ count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
++ cond_resched();
+ }
+-
+ }
++#else
++void scan_mapping_unevictable_pages(struct address_space *mapping)
++{
++}
++#endif /* CONFIG_SHMEM */
+
+ static void warn_scan_unevictable_pages(void)
+ {
--- /dev/null
+From 245132643e1cfcd145bbc86a716c1818371fcb93 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 20 Jan 2012 14:34:21 -0800
+Subject: SHM_UNLOCK: fix Unevictable pages stranded after swap
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 245132643e1cfcd145bbc86a716c1818371fcb93 upstream.
+
+Commit cc39c6a9bbde ("mm: account skipped entries to avoid looping in
+find_get_pages") correctly fixed an infinite loop; but left a problem
+that find_get_pages() on shmem would return 0 (appearing to callers to
+mean end of tree) when it meets a run of nr_pages swap entries.
+
+The only uses of find_get_pages() on shmem are via pagevec_lookup(),
+called from invalidate_mapping_pages(), and from shmctl SHM_UNLOCK's
+scan_mapping_unevictable_pages(). The first is already commented, and
+not worth worrying about; but the second can leave pages on the
+Unevictable list after an unusual sequence of swapping and locking.
+
+Fix that by using shmem_find_get_pages_and_swap() (then ignoring the
+swap) instead of pagevec_lookup().
+
+But I don't want to contaminate vmscan.c with shmem internals, nor
+shmem.c with LRU locking. So move scan_mapping_unevictable_pages() into
+shmem.c, renaming it shmem_unlock_mapping(); and rename
+check_move_unevictable_page() to check_move_unevictable_pages(), looping
+down an array of pages, oftentimes under the same lock.
+
+Leave out the "rotate unevictable list" block: that's a leftover from
+when this was used for /proc/sys/vm/scan_unevictable_pages, whose flawed
+handling involved looking at pages at tail of LRU.
+
+Was there significance to the sequence first ClearPageUnevictable, then
+test page_evictable, then SetPageUnevictable here? I think not, we're
+under LRU lock, and have no barriers between those.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Minchan Kim <minchan.kim@gmail.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Shaohua Li <shaohua.li@intel.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michel Lespinasse <walken@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+
+---
+ include/linux/shmem_fs.h | 1
+ include/linux/swap.h | 2
+ ipc/shm.c | 2
+ mm/shmem.c | 46 +++++++++++++++--
+ mm/vmscan.c | 124 ++++++++++++++---------------------------------
+ 5 files changed, 82 insertions(+), 93 deletions(-)
+
+--- a/include/linux/shmem_fs.h
++++ b/include/linux/shmem_fs.h
+@@ -48,6 +48,7 @@ extern struct file *shmem_file_setup(con
+ loff_t size, unsigned long flags);
+ extern int shmem_zero_setup(struct vm_area_struct *);
+ extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
++extern void shmem_unlock_mapping(struct address_space *mapping);
+ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
+ pgoff_t index, gfp_t gfp_mask);
+ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
+--- a/include/linux/swap.h
++++ b/include/linux/swap.h
+@@ -272,7 +272,7 @@ static inline int zone_reclaim(struct zo
+ #endif
+
+ extern int page_evictable(struct page *page, struct vm_area_struct *vma);
+-extern void scan_mapping_unevictable_pages(struct address_space *);
++extern void check_move_unevictable_pages(struct page **, int nr_pages);
+
+ extern unsigned long scan_unevictable_pages;
+ extern int scan_unevictable_handler(struct ctl_table *, int,
+--- a/ipc/shm.c
++++ b/ipc/shm.c
+@@ -916,7 +916,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int,
+ shp->mlock_user = NULL;
+ get_file(shm_file);
+ shm_unlock(shp);
+- scan_mapping_unevictable_pages(shm_file->f_mapping);
++ shmem_unlock_mapping(shm_file->f_mapping);
+ fput(shm_file);
+ goto out;
+ }
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -379,7 +379,7 @@ static int shmem_free_swap(struct addres
+ /*
+ * Pagevec may contain swap entries, so shuffle up pages before releasing.
+ */
+-static void shmem_pagevec_release(struct pagevec *pvec)
++static void shmem_deswap_pagevec(struct pagevec *pvec)
+ {
+ int i, j;
+
+@@ -389,7 +389,36 @@ static void shmem_pagevec_release(struct
+ pvec->pages[j++] = page;
+ }
+ pvec->nr = j;
+- pagevec_release(pvec);
++}
++
++/*
++ * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
++ */
++void shmem_unlock_mapping(struct address_space *mapping)
++{
++ struct pagevec pvec;
++ pgoff_t indices[PAGEVEC_SIZE];
++ pgoff_t index = 0;
++
++ pagevec_init(&pvec, 0);
++ /*
++ * Minor point, but we might as well stop if someone else SHM_LOCKs it.
++ */
++ while (!mapping_unevictable(mapping)) {
++ /*
++ * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
++ * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
++ */
++ pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
++ PAGEVEC_SIZE, pvec.pages, indices);
++ if (!pvec.nr)
++ break;
++ index = indices[pvec.nr - 1] + 1;
++ shmem_deswap_pagevec(&pvec);
++ check_move_unevictable_pages(pvec.pages, pvec.nr);
++ pagevec_release(&pvec);
++ cond_resched();
++ }
+ }
+
+ /*
+@@ -440,7 +469,8 @@ void shmem_truncate_range(struct inode *
+ }
+ unlock_page(page);
+ }
+- shmem_pagevec_release(&pvec);
++ shmem_deswap_pagevec(&pvec);
++ pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
+ cond_resched();
+ index++;
+@@ -470,7 +500,8 @@ void shmem_truncate_range(struct inode *
+ continue;
+ }
+ if (index == start && indices[0] > end) {
+- shmem_pagevec_release(&pvec);
++ shmem_deswap_pagevec(&pvec);
++ pagevec_release(&pvec);
+ break;
+ }
+ mem_cgroup_uncharge_start();
+@@ -494,7 +525,8 @@ void shmem_truncate_range(struct inode *
+ }
+ unlock_page(page);
+ }
+- shmem_pagevec_release(&pvec);
++ shmem_deswap_pagevec(&pvec);
++ pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
+ index++;
+ }
+@@ -2439,6 +2471,10 @@ int shmem_lock(struct file *file, int lo
+ return 0;
+ }
+
++void shmem_unlock_mapping(struct address_space *mapping)
++{
++}
++
+ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
+ {
+ truncate_inode_pages_range(inode->i_mapping, lstart, lend);
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -636,7 +636,7 @@ redo:
+ * When racing with an mlock or AS_UNEVICTABLE clearing
+ * (page is unlocked) make sure that if the other thread
+ * does not observe our setting of PG_lru and fails
+- * isolation/check_move_unevictable_page,
++ * isolation/check_move_unevictable_pages,
+ * we see PG_mlocked/AS_UNEVICTABLE cleared below and move
+ * the page back to the evictable list.
+ *
+@@ -3355,104 +3355,56 @@ int page_evictable(struct page *page, st
+
+ #ifdef CONFIG_SHMEM
+ /**
+- * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
+- * @page: page to check evictability and move to appropriate lru list
+- * @zone: zone page is in
++ * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list
++ * @pages: array of pages to check
++ * @nr_pages: number of pages to check
+ *
+- * Checks a page for evictability and moves the page to the appropriate
+- * zone lru list.
+- *
+- * Restrictions: zone->lru_lock must be held, page must be on LRU and must
+- * have PageUnevictable set.
++ * Checks pages for evictability and moves them to the appropriate lru list.
+ *
+ * This function is only used for SysV IPC SHM_UNLOCK.
+ */
+-static void check_move_unevictable_page(struct page *page, struct zone *zone)
++void check_move_unevictable_pages(struct page **pages, int nr_pages)
+ {
+- VM_BUG_ON(PageActive(page));
+-
+-retry:
+- ClearPageUnevictable(page);
+- if (page_evictable(page, NULL)) {
+- enum lru_list l = page_lru_base_type(page);
+-
+- __dec_zone_state(zone, NR_UNEVICTABLE);
+- list_move(&page->lru, &zone->lru[l].list);
+- mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
+- __inc_zone_state(zone, NR_INACTIVE_ANON + l);
+- __count_vm_event(UNEVICTABLE_PGRESCUED);
+- } else {
+- /*
+- * rotate unevictable list
+- */
+- SetPageUnevictable(page);
+- list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
+- mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
+- if (page_evictable(page, NULL))
+- goto retry;
+- }
+-}
++ struct zone *zone = NULL;
++ int pgscanned = 0;
++ int pgrescued = 0;
++ int i;
+
+-/**
+- * scan_mapping_unevictable_pages - scan an address space for evictable pages
+- * @mapping: struct address_space to scan for evictable pages
+- *
+- * Scan all pages in mapping. Check unevictable pages for
+- * evictability and move them to the appropriate zone lru list.
+- *
+- * This function is only used for SysV IPC SHM_UNLOCK.
+- */
+-void scan_mapping_unevictable_pages(struct address_space *mapping)
+-{
+- pgoff_t next = 0;
+- pgoff_t end = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
+- PAGE_CACHE_SHIFT;
+- struct zone *zone;
+- struct pagevec pvec;
++ for (i = 0; i < nr_pages; i++) {
++ struct page *page = pages[i];
++ struct zone *pagezone;
++
++ pgscanned++;
++ pagezone = page_zone(page);
++ if (pagezone != zone) {
++ if (zone)
++ spin_unlock_irq(&zone->lru_lock);
++ zone = pagezone;
++ spin_lock_irq(&zone->lru_lock);
++ }
+
+- if (mapping->nrpages == 0)
+- return;
++ if (!PageLRU(page) || !PageUnevictable(page))
++ continue;
+
+- pagevec_init(&pvec, 0);
+- while (next < end &&
+- pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+- int i;
+- int pg_scanned = 0;
+-
+- zone = NULL;
+-
+- for (i = 0; i < pagevec_count(&pvec); i++) {
+- struct page *page = pvec.pages[i];
+- pgoff_t page_index = page->index;
+- struct zone *pagezone = page_zone(page);
+-
+- pg_scanned++;
+- if (page_index > next)
+- next = page_index;
+- next++;
+-
+- if (pagezone != zone) {
+- if (zone)
+- spin_unlock_irq(&zone->lru_lock);
+- zone = pagezone;
+- spin_lock_irq(&zone->lru_lock);
+- }
++ if (page_evictable(page, NULL)) {
++ enum lru_list lru = page_lru_base_type(page);
+
+- if (PageLRU(page) && PageUnevictable(page))
+- check_move_unevictable_page(page, zone);
++ VM_BUG_ON(PageActive(page));
++ ClearPageUnevictable(page);
++ __dec_zone_state(zone, NR_UNEVICTABLE);
++ list_move(&page->lru, &zone->lru[lru].list);
++ mem_cgroup_move_lists(page, LRU_UNEVICTABLE, lru);
++ __inc_zone_state(zone, NR_INACTIVE_ANON + lru);
++ pgrescued++;
+ }
+- if (zone)
+- spin_unlock_irq(&zone->lru_lock);
+- pagevec_release(&pvec);
++ }
+
+- count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
+- cond_resched();
++ if (zone) {
++ __count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
++ __count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
++ spin_unlock_irq(&zone->lru_lock);
+ }
+ }
+-#else
+-void scan_mapping_unevictable_pages(struct address_space *mapping)
+-{
+-}
+ #endif /* CONFIG_SHMEM */
+
+ static void warn_scan_unevictable_pages(void)