]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Nov 2014 20:31:34 +0000 (12:31 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Nov 2014 20:31:34 +0000 (12:31 -0800)
added patches:
callers-of-iov_copy_from_user_atomic-don-t-need.patch
lib-radix-tree-add-radix_tree_delete_item.patch
mm-compaction-clean-up-unused-code-lines.patch
mm-compaction-cleanup-isolate_freepages.patch
mm-filemap-move-radix-tree-hole-searching-here.patch
mm-fs-prepare-for-non-page-entries-in-page-cache-radix-trees.patch
mm-madvise-fix-madv_willneed-on-shmem-swapouts.patch
mm-migration-add-destination-page-freeing-callback.patch
mm-readahead.c-inline-ra_submit.patch
mm-remove-read_cache_page_async.patch
mm-shmem-save-one-radix-tree-lookup-when-truncating-swapped-pages.patch

12 files changed:
queue-3.14/callers-of-iov_copy_from_user_atomic-don-t-need.patch [new file with mode: 0644]
queue-3.14/lib-radix-tree-add-radix_tree_delete_item.patch [new file with mode: 0644]
queue-3.14/mm-compaction-clean-up-unused-code-lines.patch [new file with mode: 0644]
queue-3.14/mm-compaction-cleanup-isolate_freepages.patch [new file with mode: 0644]
queue-3.14/mm-filemap-move-radix-tree-hole-searching-here.patch [new file with mode: 0644]
queue-3.14/mm-fs-prepare-for-non-page-entries-in-page-cache-radix-trees.patch [new file with mode: 0644]
queue-3.14/mm-madvise-fix-madv_willneed-on-shmem-swapouts.patch [new file with mode: 0644]
queue-3.14/mm-migration-add-destination-page-freeing-callback.patch [new file with mode: 0644]
queue-3.14/mm-readahead.c-inline-ra_submit.patch [new file with mode: 0644]
queue-3.14/mm-remove-read_cache_page_async.patch [new file with mode: 0644]
queue-3.14/mm-shmem-save-one-radix-tree-lookup-when-truncating-swapped-pages.patch [new file with mode: 0644]
queue-3.14/series

diff --git a/queue-3.14/callers-of-iov_copy_from_user_atomic-don-t-need.patch b/queue-3.14/callers-of-iov_copy_from_user_atomic-don-t-need.patch
new file mode 100644 (file)
index 0000000..c87aec9
--- /dev/null
@@ -0,0 +1,69 @@
+From 9e8c2af96e0d2d5fe298dd796fb6bc16e888a48d Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Sun, 2 Feb 2014 22:10:25 -0500
+Subject: callers of iov_copy_from_user_atomic() don't need pagecache_disable()
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 9e8c2af96e0d2d5fe298dd796fb6bc16e888a48d upstream.
+
+... it does that itself (via kmap_atomic())
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c |    5 -----
+ fs/fuse/file.c  |    2 --
+ mm/filemap.c    |    3 ---
+ 3 files changed, 10 deletions(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -425,13 +425,8 @@ static noinline int btrfs_copy_from_user
+               struct page *page = prepared_pages[pg];
+               /*
+                * Copy data from userspace to the current page
+-               *
+-               * Disable pagefault to avoid recursive lock since
+-               * the pages are already locked
+                */
+-              pagefault_disable();
+               copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
+-              pagefault_enable();
+               /* Flush processor's dcache for this page */
+               flush_dcache_page(page);
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -1003,9 +1003,7 @@ static ssize_t fuse_fill_write_pages(str
+               if (mapping_writably_mapped(mapping))
+                       flush_dcache_page(page);
+-              pagefault_disable();
+               tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
+-              pagefault_enable();
+               flush_dcache_page(page);
+               mark_page_accessed(page);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -2188,7 +2188,6 @@ size_t iov_iter_copy_from_user_atomic(st
+       char *kaddr;
+       size_t copied;
+-      BUG_ON(!in_atomic());
+       kaddr = kmap_atomic(page);
+       if (likely(i->nr_segs == 1)) {
+               int left;
+@@ -2562,9 +2561,7 @@ again:
+               if (mapping_writably_mapped(mapping))
+                       flush_dcache_page(page);
+-              pagefault_disable();
+               copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
+-              pagefault_enable();
+               flush_dcache_page(page);
+               mark_page_accessed(page);
diff --git a/queue-3.14/lib-radix-tree-add-radix_tree_delete_item.patch b/queue-3.14/lib-radix-tree-add-radix_tree_delete_item.patch
new file mode 100644 (file)
index 0000000..ce8d71d
--- /dev/null
@@ -0,0 +1,117 @@
+From 53c59f262d747ea82e7414774c59a489501186a0 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 3 Apr 2014 14:47:39 -0700
+Subject: lib: radix-tree: add radix_tree_delete_item()
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 53c59f262d747ea82e7414774c59a489501186a0 upstream.
+
+Provide a function that does not just delete an entry at a given index,
+but also allows passing in an expected item.  Delete only if that item
+is still located at the specified index.
+
+This is handy when lockless tree traversals want to delete entries as
+well because they don't have to do an second, locked lookup to verify
+the slot has not changed under them before deleting the entry.
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Minchan Kim <minchan@kernel.org>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Bob Liu <bob.liu@oracle.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Luigi Semenzato <semenzato@google.com>
+Cc: Metin Doslu <metin@citusdata.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Ozgun Erdogan <ozgun@citusdata.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Roman Gushchin <klamm@yandex-team.ru>
+Cc: Ryan Mallon <rmallon@gmail.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/radix-tree.h |    1 +
+ lib/radix-tree.c           |   31 +++++++++++++++++++++++++++----
+ 2 files changed, 28 insertions(+), 4 deletions(-)
+
+--- a/include/linux/radix-tree.h
++++ b/include/linux/radix-tree.h
+@@ -219,6 +219,7 @@ static inline void radix_tree_replace_sl
+ int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
+ void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
+ void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
++void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
+ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
+ unsigned int
+ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+--- a/lib/radix-tree.c
++++ b/lib/radix-tree.c
+@@ -1337,15 +1337,18 @@ static inline void radix_tree_shrink(str
+ }
+ /**
+- *    radix_tree_delete    -    delete an item from a radix tree
++ *    radix_tree_delete_item    -    delete an item from a radix tree
+  *    @root:          radix tree root
+  *    @index:         index key
++ *    @item:          expected item
+  *
+- *    Remove the item at @index from the radix tree rooted at @root.
++ *    Remove @item at @index from the radix tree rooted at @root.
+  *
+- *    Returns the address of the deleted item, or NULL if it was not present.
++ *    Returns the address of the deleted item, or NULL if it was not present
++ *    or the entry at the given @index was not @item.
+  */
+-void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
++void *radix_tree_delete_item(struct radix_tree_root *root,
++                           unsigned long index, void *item)
+ {
+       struct radix_tree_node *node = NULL;
+       struct radix_tree_node *slot = NULL;
+@@ -1380,6 +1383,11 @@ void *radix_tree_delete(struct radix_tre
+       if (slot == NULL)
+               goto out;
++      if (item && slot != item) {
++              slot = NULL;
++              goto out;
++      }
++
+       /*
+        * Clear all tags associated with the item to be deleted.
+        * This way of doing it would be inefficient, but seldom is any set.
+@@ -1424,6 +1432,21 @@ void *radix_tree_delete(struct radix_tre
+ out:
+       return slot;
+ }
++EXPORT_SYMBOL(radix_tree_delete_item);
++
++/**
++ *    radix_tree_delete    -    delete an item from a radix tree
++ *    @root:          radix tree root
++ *    @index:         index key
++ *
++ *    Remove the item at @index from the radix tree rooted at @root.
++ *
++ *    Returns the address of the deleted item, or NULL if it was not present.
++ */
++void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
++{
++      return radix_tree_delete_item(root, index, NULL);
++}
+ EXPORT_SYMBOL(radix_tree_delete);
+ /**
diff --git a/queue-3.14/mm-compaction-clean-up-unused-code-lines.patch b/queue-3.14/mm-compaction-clean-up-unused-code-lines.patch
new file mode 100644 (file)
index 0000000..f6ab43b
--- /dev/null
@@ -0,0 +1,67 @@
+From 13fb44e4b0414d7e718433a49e6430d5b76bd46e Mon Sep 17 00:00:00 2001
+From: Heesub Shin <heesub.shin@samsung.com>
+Date: Wed, 4 Jun 2014 16:07:24 -0700
+Subject: mm/compaction: clean up unused code lines
+
+From: Heesub Shin <heesub.shin@samsung.com>
+
+commit 13fb44e4b0414d7e718433a49e6430d5b76bd46e upstream.
+
+Remove code lines currently not in use or never called.
+
+Signed-off-by: Heesub Shin <heesub.shin@samsung.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Dongjun Shin <d.j.shin@samsung.com>
+Cc: Sunghwan Yun <sunghwan.yun@samsung.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+Cc: Michal Nazarewicz <mina86@mina86.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Dongjun Shin <d.j.shin@samsung.com>
+Cc: Sunghwan Yun <sunghwan.yun@samsung.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c |   10 ----------
+ 1 file changed, 10 deletions(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -208,12 +208,6 @@ static bool compact_checklock_irqsave(sp
+       return true;
+ }
+-static inline bool compact_trylock_irqsave(spinlock_t *lock,
+-                      unsigned long *flags, struct compact_control *cc)
+-{
+-      return compact_checklock_irqsave(lock, flags, false, cc);
+-}
+-
+ /* Returns true if the page is within a block suitable for migration to */
+ static bool suitable_migration_target(struct page *page)
+ {
+@@ -736,7 +730,6 @@ static void isolate_freepages(struct zon
+                       continue;
+               /* Found a block suitable for isolating free pages from */
+-              isolated = 0;
+               /*
+                * Take care when isolating in last pageblock of a zone which
+@@ -1165,9 +1158,6 @@ static void __compact_pgdat(pg_data_t *p
+                       if (zone_watermark_ok(zone, cc->order,
+                                               low_wmark_pages(zone), 0, 0))
+                               compaction_defer_reset(zone, cc->order, false);
+-                      /* Currently async compaction is never deferred. */
+-                      else if (cc->sync)
+-                              defer_compaction(zone, cc->order);
+               }
+               VM_BUG_ON(!list_empty(&cc->freepages));
diff --git a/queue-3.14/mm-compaction-cleanup-isolate_freepages.patch b/queue-3.14/mm-compaction-cleanup-isolate_freepages.patch
new file mode 100644 (file)
index 0000000..2d6c3ee
--- /dev/null
@@ -0,0 +1,161 @@
+From c96b9e508f3d06ddb601dcc9792d62c044ab359e Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Wed, 4 Jun 2014 16:07:26 -0700
+Subject: mm/compaction: cleanup isolate_freepages()
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit c96b9e508f3d06ddb601dcc9792d62c044ab359e upstream.
+
+isolate_freepages() is currently somewhat hard to follow thanks to many
+looks like it is related to the 'low_pfn' variable, but in fact it is not.
+
+This patch renames the 'high_pfn' variable to a hopefully less confusing name,
+and slightly changes its handling without a functional change. A comment made
+obsolete by recent changes is also updated.
+
+[akpm@linux-foundation.org: comment fixes, per Minchan]
+[iamjoonsoo.kim@lge.com: cleanups]
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+Cc: Michal Nazarewicz <mina86@mina86.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Dongjun Shin <d.j.shin@samsung.com>
+Cc: Sunghwan Yun <sunghwan.yun@samsung.com>
+Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c |   56 +++++++++++++++++++++++++++-----------------------------
+ 1 file changed, 27 insertions(+), 29 deletions(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -665,7 +665,10 @@ static void isolate_freepages(struct zon
+                               struct compact_control *cc)
+ {
+       struct page *page;
+-      unsigned long high_pfn, low_pfn, pfn, z_end_pfn;
++      unsigned long block_start_pfn;  /* start of current pageblock */
++      unsigned long block_end_pfn;    /* end of current pageblock */
++      unsigned long low_pfn;       /* lowest pfn scanner is able to scan */
++      unsigned long next_free_pfn; /* start pfn for scaning at next round */
+       int nr_freepages = cc->nr_freepages;
+       struct list_head *freelist = &cc->freepages;
+@@ -673,32 +676,33 @@ static void isolate_freepages(struct zon
+        * Initialise the free scanner. The starting point is where we last
+        * successfully isolated from, zone-cached value, or the end of the
+        * zone when isolating for the first time. We need this aligned to
+-       * the pageblock boundary, because we do pfn -= pageblock_nr_pages
+-       * in the for loop.
++       * the pageblock boundary, because we do
++       * block_start_pfn -= pageblock_nr_pages in the for loop.
++       * For ending point, take care when isolating in last pageblock of a
++       * a zone which ends in the middle of a pageblock.
+        * The low boundary is the end of the pageblock the migration scanner
+        * is using.
+        */
+-      pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
++      block_start_pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
++      block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
++                                              zone_end_pfn(zone));
+       low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages);
+       /*
+-       * Take care that if the migration scanner is at the end of the zone
+-       * that the free scanner does not accidentally move to the next zone
+-       * in the next isolation cycle.
++       * If no pages are isolated, the block_start_pfn < low_pfn check
++       * will kick in.
+        */
+-      high_pfn = min(low_pfn, pfn);
+-
+-      z_end_pfn = zone_end_pfn(zone);
++      next_free_pfn = 0;
+       /*
+        * Isolate free pages until enough are available to migrate the
+        * pages on cc->migratepages. We stop searching if the migrate
+        * and free page scanners meet or enough free pages are isolated.
+        */
+-      for (; pfn >= low_pfn && cc->nr_migratepages > nr_freepages;
+-                                      pfn -= pageblock_nr_pages) {
++      for (; block_start_pfn >= low_pfn && cc->nr_migratepages > nr_freepages;
++                              block_end_pfn = block_start_pfn,
++                              block_start_pfn -= pageblock_nr_pages) {
+               unsigned long isolated;
+-              unsigned long end_pfn;
+               /*
+                * This can iterate a massively long zone without finding any
+@@ -707,7 +711,7 @@ static void isolate_freepages(struct zon
+                */
+               cond_resched();
+-              if (!pfn_valid(pfn))
++              if (!pfn_valid(block_start_pfn))
+                       continue;
+               /*
+@@ -717,7 +721,7 @@ static void isolate_freepages(struct zon
+                * i.e. it's possible that all pages within a zones range of
+                * pages do not belong to a single zone.
+                */
+-              page = pfn_to_page(pfn);
++              page = pfn_to_page(block_start_pfn);
+               if (page_zone(page) != zone)
+                       continue;
+@@ -730,14 +734,8 @@ static void isolate_freepages(struct zon
+                       continue;
+               /* Found a block suitable for isolating free pages from */
+-
+-              /*
+-               * Take care when isolating in last pageblock of a zone which
+-               * ends in the middle of a pageblock.
+-               */
+-              end_pfn = min(pfn + pageblock_nr_pages, z_end_pfn);
+-              isolated = isolate_freepages_block(cc, pfn, end_pfn,
+-                                                 freelist, false);
++              isolated = isolate_freepages_block(cc, block_start_pfn,
++                                      block_end_pfn, freelist, false);
+               nr_freepages += isolated;
+               /*
+@@ -745,9 +743,9 @@ static void isolate_freepages(struct zon
+                * looking for free pages, the search will restart here as
+                * page migration may have returned some pages to the allocator
+                */
+-              if (isolated) {
++              if (isolated && next_free_pfn == 0) {
+                       cc->finished_update_free = true;
+-                      high_pfn = max(high_pfn, pfn);
++                      next_free_pfn = block_start_pfn;
+               }
+       }
+@@ -758,10 +756,10 @@ static void isolate_freepages(struct zon
+        * If we crossed the migrate scanner, we want to keep it that way
+        * so that compact_finished() may detect this
+        */
+-      if (pfn < low_pfn)
+-              cc->free_pfn = max(pfn, zone->zone_start_pfn);
+-      else
+-              cc->free_pfn = high_pfn;
++      if (block_start_pfn < low_pfn)
++              next_free_pfn = cc->migrate_pfn;
++
++      cc->free_pfn = next_free_pfn;
+       cc->nr_freepages = nr_freepages;
+ }
diff --git a/queue-3.14/mm-filemap-move-radix-tree-hole-searching-here.patch b/queue-3.14/mm-filemap-move-radix-tree-hole-searching-here.patch
new file mode 100644 (file)
index 0000000..7b1e612
--- /dev/null
@@ -0,0 +1,284 @@
+From e7b563bb2a6f4d974208da46200784b9c5b5a47e Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 3 Apr 2014 14:47:44 -0700
+Subject: mm: filemap: move radix tree hole searching here
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit e7b563bb2a6f4d974208da46200784b9c5b5a47e upstream.
+
+The radix tree hole searching code is only used for page cache, for
+example the readahead code trying to get a a picture of the area
+surrounding a fault.
+
+It sufficed to rely on the radix tree definition of holes, which is
+"empty tree slot".  But this is about to change, though, as shadow page
+descriptors will be stored in the page cache after the actual pages get
+evicted from memory.
+
+Move the functions over to mm/filemap.c and make them native page cache
+operations, where they can later be adapted to handle the new definition
+of "page cache hole".
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Reviewed-by: Minchan Kim <minchan@kernel.org>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Bob Liu <bob.liu@oracle.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Luigi Semenzato <semenzato@google.com>
+Cc: Metin Doslu <metin@citusdata.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Ozgun Erdogan <ozgun@citusdata.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Roman Gushchin <klamm@yandex-team.ru>
+Cc: Ryan Mallon <rmallon@gmail.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/blocklayout/blocklayout.c |    2 -
+ include/linux/pagemap.h          |    5 ++
+ include/linux/radix-tree.h       |    4 --
+ lib/radix-tree.c                 |   75 --------------------------------------
+ mm/filemap.c                     |   76 +++++++++++++++++++++++++++++++++++++++
+ mm/readahead.c                   |    4 +-
+ 6 files changed, 84 insertions(+), 82 deletions(-)
+
+--- a/fs/nfs/blocklayout/blocklayout.c
++++ b/fs/nfs/blocklayout/blocklayout.c
+@@ -1213,7 +1213,7 @@ static u64 pnfs_num_cont_bytes(struct in
+       end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
+       if (end != NFS_I(inode)->npages) {
+               rcu_read_lock();
+-              end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX);
++              end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
+               rcu_read_unlock();
+       }
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -243,6 +243,11 @@ static inline struct page *page_cache_al
+ typedef int filler_t(void *, struct page *);
++pgoff_t page_cache_next_hole(struct address_space *mapping,
++                           pgoff_t index, unsigned long max_scan);
++pgoff_t page_cache_prev_hole(struct address_space *mapping,
++                           pgoff_t index, unsigned long max_scan);
++
+ extern struct page * find_get_page(struct address_space *mapping,
+                               pgoff_t index);
+ extern struct page * find_lock_page(struct address_space *mapping,
+--- a/include/linux/radix-tree.h
++++ b/include/linux/radix-tree.h
+@@ -227,10 +227,6 @@ radix_tree_gang_lookup(struct radix_tree
+ unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
+                       void ***results, unsigned long *indices,
+                       unsigned long first_index, unsigned int max_items);
+-unsigned long radix_tree_next_hole(struct radix_tree_root *root,
+-                              unsigned long index, unsigned long max_scan);
+-unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
+-                              unsigned long index, unsigned long max_scan);
+ int radix_tree_preload(gfp_t gfp_mask);
+ int radix_tree_maybe_preload(gfp_t gfp_mask);
+ void radix_tree_init(void);
+--- a/lib/radix-tree.c
++++ b/lib/radix-tree.c
+@@ -946,81 +946,6 @@ next:
+ }
+ EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
+-
+-/**
+- *    radix_tree_next_hole    -    find the next hole (not-present entry)
+- *    @root:          tree root
+- *    @index:         index key
+- *    @max_scan:      maximum range to search
+- *
+- *    Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
+- *    indexed hole.
+- *
+- *    Returns: the index of the hole if found, otherwise returns an index
+- *    outside of the set specified (in which case 'return - index >= max_scan'
+- *    will be true). In rare cases of index wrap-around, 0 will be returned.
+- *
+- *    radix_tree_next_hole may be called under rcu_read_lock. However, like
+- *    radix_tree_gang_lookup, this will not atomically search a snapshot of
+- *    the tree at a single point in time. For example, if a hole is created
+- *    at index 5, then subsequently a hole is created at index 10,
+- *    radix_tree_next_hole covering both indexes may return 10 if called
+- *    under rcu_read_lock.
+- */
+-unsigned long radix_tree_next_hole(struct radix_tree_root *root,
+-                              unsigned long index, unsigned long max_scan)
+-{
+-      unsigned long i;
+-
+-      for (i = 0; i < max_scan; i++) {
+-              if (!radix_tree_lookup(root, index))
+-                      break;
+-              index++;
+-              if (index == 0)
+-                      break;
+-      }
+-
+-      return index;
+-}
+-EXPORT_SYMBOL(radix_tree_next_hole);
+-
+-/**
+- *    radix_tree_prev_hole    -    find the prev hole (not-present entry)
+- *    @root:          tree root
+- *    @index:         index key
+- *    @max_scan:      maximum range to search
+- *
+- *    Search backwards in the range [max(index-max_scan+1, 0), index]
+- *    for the first hole.
+- *
+- *    Returns: the index of the hole if found, otherwise returns an index
+- *    outside of the set specified (in which case 'index - return >= max_scan'
+- *    will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
+- *
+- *    radix_tree_next_hole may be called under rcu_read_lock. However, like
+- *    radix_tree_gang_lookup, this will not atomically search a snapshot of
+- *    the tree at a single point in time. For example, if a hole is created
+- *    at index 10, then subsequently a hole is created at index 5,
+- *    radix_tree_prev_hole covering both indexes may return 5 if called under
+- *    rcu_read_lock.
+- */
+-unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
+-                                 unsigned long index, unsigned long max_scan)
+-{
+-      unsigned long i;
+-
+-      for (i = 0; i < max_scan; i++) {
+-              if (!radix_tree_lookup(root, index))
+-                      break;
+-              index--;
+-              if (index == ULONG_MAX)
+-                      break;
+-      }
+-
+-      return index;
+-}
+-EXPORT_SYMBOL(radix_tree_prev_hole);
+-
+ /**
+  *    radix_tree_gang_lookup - perform multiple lookup on a radix tree
+  *    @root:          radix tree root
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -688,6 +688,82 @@ int __lock_page_or_retry(struct page *pa
+ }
+ /**
++ * page_cache_next_hole - find the next hole (not-present entry)
++ * @mapping: mapping
++ * @index: index
++ * @max_scan: maximum range to search
++ *
++ * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the
++ * lowest indexed hole.
++ *
++ * Returns: the index of the hole if found, otherwise returns an index
++ * outside of the set specified (in which case 'return - index >=
++ * max_scan' will be true). In rare cases of index wrap-around, 0 will
++ * be returned.
++ *
++ * page_cache_next_hole may be called under rcu_read_lock. However,
++ * like radix_tree_gang_lookup, this will not atomically search a
++ * snapshot of the tree at a single point in time. For example, if a
++ * hole is created at index 5, then subsequently a hole is created at
++ * index 10, page_cache_next_hole covering both indexes may return 10
++ * if called under rcu_read_lock.
++ */
++pgoff_t page_cache_next_hole(struct address_space *mapping,
++                           pgoff_t index, unsigned long max_scan)
++{
++      unsigned long i;
++
++      for (i = 0; i < max_scan; i++) {
++              if (!radix_tree_lookup(&mapping->page_tree, index))
++                      break;
++              index++;
++              if (index == 0)
++                      break;
++      }
++
++      return index;
++}
++EXPORT_SYMBOL(page_cache_next_hole);
++
++/**
++ * page_cache_prev_hole - find the prev hole (not-present entry)
++ * @mapping: mapping
++ * @index: index
++ * @max_scan: maximum range to search
++ *
++ * Search backwards in the range [max(index-max_scan+1, 0), index] for
++ * the first hole.
++ *
++ * Returns: the index of the hole if found, otherwise returns an index
++ * outside of the set specified (in which case 'index - return >=
++ * max_scan' will be true). In rare cases of wrap-around, ULONG_MAX
++ * will be returned.
++ *
++ * page_cache_prev_hole may be called under rcu_read_lock. However,
++ * like radix_tree_gang_lookup, this will not atomically search a
++ * snapshot of the tree at a single point in time. For example, if a
++ * hole is created at index 10, then subsequently a hole is created at
++ * index 5, page_cache_prev_hole covering both indexes may return 5 if
++ * called under rcu_read_lock.
++ */
++pgoff_t page_cache_prev_hole(struct address_space *mapping,
++                           pgoff_t index, unsigned long max_scan)
++{
++      unsigned long i;
++
++      for (i = 0; i < max_scan; i++) {
++              if (!radix_tree_lookup(&mapping->page_tree, index))
++                      break;
++              index--;
++              if (index == ULONG_MAX)
++                      break;
++      }
++
++      return index;
++}
++EXPORT_SYMBOL(page_cache_prev_hole);
++
++/**
+  * find_get_page - find and get a page reference
+  * @mapping: the address_space to search
+  * @offset: the page index
+--- a/mm/readahead.c
++++ b/mm/readahead.c
+@@ -347,7 +347,7 @@ static pgoff_t count_history_pages(struc
+       pgoff_t head;
+       rcu_read_lock();
+-      head = radix_tree_prev_hole(&mapping->page_tree, offset - 1, max);
++      head = page_cache_prev_hole(mapping, offset - 1, max);
+       rcu_read_unlock();
+       return offset - 1 - head;
+@@ -427,7 +427,7 @@ ondemand_readahead(struct address_space
+               pgoff_t start;
+               rcu_read_lock();
+-              start = radix_tree_next_hole(&mapping->page_tree, offset+1,max);
++              start = page_cache_next_hole(mapping, offset + 1, max);
+               rcu_read_unlock();
+               if (!start || start - offset > max)
diff --git a/queue-3.14/mm-fs-prepare-for-non-page-entries-in-page-cache-radix-trees.patch b/queue-3.14/mm-fs-prepare-for-non-page-entries-in-page-cache-radix-trees.patch
new file mode 100644 (file)
index 0000000..b7ee04f
--- /dev/null
@@ -0,0 +1,902 @@
+From 0cd6144aadd2afd19d1aca880153530c52957604 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 3 Apr 2014 14:47:46 -0700
+Subject: mm + fs: prepare for non-page entries in page cache radix trees
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 0cd6144aadd2afd19d1aca880153530c52957604 upstream.
+
+shmem mappings already contain exceptional entries where swap slot
+information is remembered.
+
+To be able to store eviction information for regular page cache, prepare
+every site dealing with the radix trees directly to handle entries other
+than pages.
+
+The common lookup functions will filter out non-page entries and return
+NULL for page cache holes, just as before.  But provide a raw version of
+the API which returns non-page entries as well, and switch shmem over to
+use it.
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Reviewed-by: Minchan Kim <minchan@kernel.org>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Bob Liu <bob.liu@oracle.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Luigi Semenzato <semenzato@google.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Metin Doslu <metin@citusdata.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Ozgun Erdogan <ozgun@citusdata.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Roman Gushchin <klamm@yandex-team.ru>
+Cc: Ryan Mallon <rmallon@gmail.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/compression.c   |    2 
+ include/linux/mm.h       |    8 +
+ include/linux/pagemap.h  |   15 ++-
+ include/linux/pagevec.h  |    5 +
+ include/linux/shmem_fs.h |    1 
+ mm/filemap.c             |  202 +++++++++++++++++++++++++++++++++++++++++------
+ mm/mincore.c             |   20 +++-
+ mm/readahead.c           |    2 
+ mm/shmem.c               |   97 ++++------------------
+ mm/swap.c                |   51 +++++++++++
+ mm/truncate.c            |   74 ++++++++++++++---
+ 11 files changed, 348 insertions(+), 129 deletions(-)
+
+--- a/fs/btrfs/compression.c
++++ b/fs/btrfs/compression.c
+@@ -472,7 +472,7 @@ static noinline int add_ra_bio_pages(str
+               rcu_read_lock();
+               page = radix_tree_lookup(&mapping->page_tree, pg_index);
+               rcu_read_unlock();
+-              if (page) {
++              if (page && !radix_tree_exceptional_entry(page)) {
+                       misses++;
+                       if (misses > 4)
+                               break;
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1041,6 +1041,14 @@ extern void show_free_areas(unsigned int
+ extern bool skip_free_areas_node(unsigned int flags, int nid);
+ int shmem_zero_setup(struct vm_area_struct *);
++#ifdef CONFIG_SHMEM
++bool shmem_mapping(struct address_space *mapping);
++#else
++static inline bool shmem_mapping(struct address_space *mapping)
++{
++      return false;
++}
++#endif
+ extern int can_do_mlock(void);
+ extern int user_shm_lock(size_t, struct user_struct *);
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -248,12 +248,15 @@ pgoff_t page_cache_next_hole(struct addr
+ pgoff_t page_cache_prev_hole(struct address_space *mapping,
+                            pgoff_t index, unsigned long max_scan);
+-extern struct page * find_get_page(struct address_space *mapping,
+-                              pgoff_t index);
+-extern struct page * find_lock_page(struct address_space *mapping,
+-                              pgoff_t index);
+-extern struct page * find_or_create_page(struct address_space *mapping,
+-                              pgoff_t index, gfp_t gfp_mask);
++struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
++struct page *find_get_page(struct address_space *mapping, pgoff_t offset);
++struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
++struct page *find_lock_page(struct address_space *mapping, pgoff_t offset);
++struct page *find_or_create_page(struct address_space *mapping, pgoff_t index,
++                               gfp_t gfp_mask);
++unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
++                        unsigned int nr_entries, struct page **entries,
++                        pgoff_t *indices);
+ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
+                       unsigned int nr_pages, struct page **pages);
+ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
+--- a/include/linux/pagevec.h
++++ b/include/linux/pagevec.h
+@@ -22,6 +22,11 @@ struct pagevec {
+ void __pagevec_release(struct pagevec *pvec);
+ void __pagevec_lru_add(struct pagevec *pvec);
++unsigned pagevec_lookup_entries(struct pagevec *pvec,
++                              struct address_space *mapping,
++                              pgoff_t start, unsigned nr_entries,
++                              pgoff_t *indices);
++void pagevec_remove_exceptionals(struct pagevec *pvec);
+ unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
+               pgoff_t start, unsigned nr_pages);
+ unsigned pagevec_lookup_tag(struct pagevec *pvec,
+--- a/include/linux/shmem_fs.h
++++ b/include/linux/shmem_fs.h
+@@ -51,6 +51,7 @@ extern struct file *shmem_kernel_file_se
+                                           unsigned long flags);
+ extern int shmem_zero_setup(struct vm_area_struct *);
+ extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
++extern bool shmem_mapping(struct address_space *mapping);
+ extern void shmem_unlock_mapping(struct address_space *mapping);
+ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
+                                       pgoff_t index, gfp_t gfp_mask);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -448,6 +448,29 @@ int replace_page_cache_page(struct page
+ }
+ EXPORT_SYMBOL_GPL(replace_page_cache_page);
++static int page_cache_tree_insert(struct address_space *mapping,
++                                struct page *page)
++{
++      void **slot;
++      int error;
++
++      slot = radix_tree_lookup_slot(&mapping->page_tree, page->index);
++      if (slot) {
++              void *p;
++
++              p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
++              if (!radix_tree_exceptional_entry(p))
++                      return -EEXIST;
++              radix_tree_replace_slot(slot, page);
++              mapping->nrpages++;
++              return 0;
++      }
++      error = radix_tree_insert(&mapping->page_tree, page->index, page);
++      if (!error)
++              mapping->nrpages++;
++      return error;
++}
++
+ /**
+  * add_to_page_cache_locked - add a locked page to the pagecache
+  * @page:     page to add
+@@ -482,11 +505,10 @@ int add_to_page_cache_locked(struct page
+       page->index = offset;
+       spin_lock_irq(&mapping->tree_lock);
+-      error = radix_tree_insert(&mapping->page_tree, offset, page);
++      error = page_cache_tree_insert(mapping, page);
+       radix_tree_preload_end();
+       if (unlikely(error))
+               goto err_insert;
+-      mapping->nrpages++;
+       __inc_zone_page_state(page, NR_FILE_PAGES);
+       spin_unlock_irq(&mapping->tree_lock);
+       trace_mm_filemap_add_to_page_cache(page);
+@@ -714,7 +736,10 @@ pgoff_t page_cache_next_hole(struct addr
+       unsigned long i;
+       for (i = 0; i < max_scan; i++) {
+-              if (!radix_tree_lookup(&mapping->page_tree, index))
++              struct page *page;
++
++              page = radix_tree_lookup(&mapping->page_tree, index);
++              if (!page || radix_tree_exceptional_entry(page))
+                       break;
+               index++;
+               if (index == 0)
+@@ -752,7 +777,10 @@ pgoff_t page_cache_prev_hole(struct addr
+       unsigned long i;
+       for (i = 0; i < max_scan; i++) {
+-              if (!radix_tree_lookup(&mapping->page_tree, index))
++              struct page *page;
++
++              page = radix_tree_lookup(&mapping->page_tree, index);
++              if (!page || radix_tree_exceptional_entry(page))
+                       break;
+               index--;
+               if (index == ULONG_MAX)
+@@ -764,14 +792,19 @@ pgoff_t page_cache_prev_hole(struct addr
+ EXPORT_SYMBOL(page_cache_prev_hole);
+ /**
+- * find_get_page - find and get a page reference
++ * find_get_entry - find and get a page cache entry
+  * @mapping: the address_space to search
+- * @offset: the page index
++ * @offset: the page cache index
++ *
++ * Looks up the page cache slot at @mapping & @offset.  If there is a
++ * page cache page, it is returned with an increased refcount.
++ *
++ * If the slot holds a shadow entry of a previously evicted page, it
++ * is returned.
+  *
+- * Is there a pagecache struct page at the given (mapping, offset) tuple?
+- * If yes, increment its refcount and return it; if no, return NULL.
++ * Otherwise, %NULL is returned.
+  */
+-struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
++struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
+ {
+       void **pagep;
+       struct page *page;
+@@ -812,24 +845,50 @@ out:
+       return page;
+ }
+-EXPORT_SYMBOL(find_get_page);
++EXPORT_SYMBOL(find_get_entry);
+ /**
+- * find_lock_page - locate, pin and lock a pagecache page
++ * find_get_page - find and get a page reference
+  * @mapping: the address_space to search
+  * @offset: the page index
+  *
+- * Locates the desired pagecache page, locks it, increments its reference
+- * count and returns its address.
++ * Looks up the page cache slot at @mapping & @offset.  If there is a
++ * page cache page, it is returned with an increased refcount.
+  *
+- * Returns zero if the page was not present. find_lock_page() may sleep.
++ * Otherwise, %NULL is returned.
+  */
+-struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
++struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
++{
++      struct page *page = find_get_entry(mapping, offset);
++
++      if (radix_tree_exceptional_entry(page))
++              page = NULL;
++      return page;
++}
++EXPORT_SYMBOL(find_get_page);
++
++/**
++ * find_lock_entry - locate, pin and lock a page cache entry
++ * @mapping: the address_space to search
++ * @offset: the page cache index
++ *
++ * Looks up the page cache slot at @mapping & @offset.  If there is a
++ * page cache page, it is returned locked and with an increased
++ * refcount.
++ *
++ * If the slot holds a shadow entry of a previously evicted page, it
++ * is returned.
++ *
++ * Otherwise, %NULL is returned.
++ *
++ * find_lock_entry() may sleep.
++ */
++struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
+ {
+       struct page *page;
+ repeat:
+-      page = find_get_page(mapping, offset);
++      page = find_get_entry(mapping, offset);
+       if (page && !radix_tree_exception(page)) {
+               lock_page(page);
+               /* Has the page been truncated? */
+@@ -842,6 +901,29 @@ repeat:
+       }
+       return page;
+ }
++EXPORT_SYMBOL(find_lock_entry);
++
++/**
++ * find_lock_page - locate, pin and lock a pagecache page
++ * @mapping: the address_space to search
++ * @offset: the page index
++ *
++ * Looks up the page cache slot at @mapping & @offset.  If there is a
++ * page cache page, it is returned locked and with an increased
++ * refcount.
++ *
++ * Otherwise, %NULL is returned.
++ *
++ * find_lock_page() may sleep.
++ */
++struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
++{
++      struct page *page = find_lock_entry(mapping, offset);
++
++      if (radix_tree_exceptional_entry(page))
++              page = NULL;
++      return page;
++}
+ EXPORT_SYMBOL(find_lock_page);
+ /**
+@@ -850,16 +932,18 @@ EXPORT_SYMBOL(find_lock_page);
+  * @index: the page's index into the mapping
+  * @gfp_mask: page allocation mode
+  *
+- * Locates a page in the pagecache.  If the page is not present, a new page
+- * is allocated using @gfp_mask and is added to the pagecache and to the VM's
+- * LRU list.  The returned page is locked and has its reference count
+- * incremented.
++ * Looks up the page cache slot at @mapping & @offset.  If there is a
++ * page cache page, it is returned locked and with an increased
++ * refcount.
++ *
++ * If the page is not present, a new page is allocated using @gfp_mask
++ * and added to the page cache and the VM's LRU list.  The page is
++ * returned locked and with an increased refcount.
+  *
+- * find_or_create_page() may sleep, even if @gfp_flags specifies an atomic
+- * allocation!
++ * On memory exhaustion, %NULL is returned.
+  *
+- * find_or_create_page() returns the desired page's address, or zero on
+- * memory exhaustion.
++ * find_or_create_page() may sleep, even if @gfp_flags specifies an
++ * atomic allocation!
+  */
+ struct page *find_or_create_page(struct address_space *mapping,
+               pgoff_t index, gfp_t gfp_mask)
+@@ -892,6 +976,76 @@ repeat:
+ EXPORT_SYMBOL(find_or_create_page);
+ /**
++ * find_get_entries - gang pagecache lookup
++ * @mapping:  The address_space to search
++ * @start:    The starting page cache index
++ * @nr_entries:       The maximum number of entries
++ * @entries:  Where the resulting entries are placed
++ * @indices:  The cache indices corresponding to the entries in @entries
++ *
++ * find_get_entries() will search for and return a group of up to
++ * @nr_entries entries in the mapping.  The entries are placed at
++ * @entries.  find_get_entries() takes a reference against any actual
++ * pages it returns.
++ *
++ * The search returns a group of mapping-contiguous page cache entries
++ * with ascending indexes.  There may be holes in the indices due to
++ * not-present pages.
++ *
++ * Any shadow entries of evicted pages are included in the returned
++ * array.
++ *
++ * find_get_entries() returns the number of pages and shadow entries
++ * which were found.
++ */
++unsigned find_get_entries(struct address_space *mapping,
++                        pgoff_t start, unsigned int nr_entries,
++                        struct page **entries, pgoff_t *indices)
++{
++      void **slot;
++      unsigned int ret = 0;
++      struct radix_tree_iter iter;
++
++      if (!nr_entries)
++              return 0;
++
++      rcu_read_lock();
++restart:
++      radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
++              struct page *page;
++repeat:
++              page = radix_tree_deref_slot(slot);
++              if (unlikely(!page))
++                      continue;
++              if (radix_tree_exception(page)) {
++                      if (radix_tree_deref_retry(page))
++                              goto restart;
++                      /*
++                       * Otherwise, we must be storing a swap entry
++                       * here as an exceptional entry: so return it
++                       * without attempting to raise page count.
++                       */
++                      goto export;
++              }
++              if (!page_cache_get_speculative(page))
++                      goto repeat;
++
++              /* Has the page moved? */
++              if (unlikely(page != *slot)) {
++                      page_cache_release(page);
++                      goto repeat;
++              }
++export:
++              indices[ret] = iter.index;
++              entries[ret] = page;
++              if (++ret == nr_entries)
++                      break;
++      }
++      rcu_read_unlock();
++      return ret;
++}
++
++/**
+  * find_get_pages - gang pagecache lookup
+  * @mapping:  The address_space to search
+  * @start:    The starting page index
+--- a/mm/mincore.c
++++ b/mm/mincore.c
+@@ -70,13 +70,21 @@ static unsigned char mincore_page(struct
+        * any other file mapping (ie. marked !present and faulted in with
+        * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
+        */
+-      page = find_get_page(mapping, pgoff);
+ #ifdef CONFIG_SWAP
+-      /* shmem/tmpfs may return swap: account for swapcache page too. */
+-      if (radix_tree_exceptional_entry(page)) {
+-              swp_entry_t swap = radix_to_swp_entry(page);
+-              page = find_get_page(swap_address_space(swap), swap.val);
+-      }
++      if (shmem_mapping(mapping)) {
++              page = find_get_entry(mapping, pgoff);
++              /*
++               * shmem/tmpfs may return swap: account for swapcache
++               * page too.
++               */
++              if (radix_tree_exceptional_entry(page)) {
++                      swp_entry_t swp = radix_to_swp_entry(page);
++                      page = find_get_page(swap_address_space(swp), swp.val);
++              }
++      } else
++              page = find_get_page(mapping, pgoff);
++#else
++      page = find_get_page(mapping, pgoff);
+ #endif
+       if (page) {
+               present = PageUptodate(page);
+--- a/mm/readahead.c
++++ b/mm/readahead.c
+@@ -179,7 +179,7 @@ __do_page_cache_readahead(struct address
+               rcu_read_lock();
+               page = radix_tree_lookup(&mapping->page_tree, page_offset);
+               rcu_read_unlock();
+-              if (page)
++              if (page && !radix_tree_exceptional_entry(page))
+                       continue;
+               page = page_cache_alloc_readahead(mapping);
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -330,56 +330,6 @@ static void shmem_delete_from_page_cache
+ }
+ /*
+- * Like find_get_pages, but collecting swap entries as well as pages.
+- */
+-static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
+-                                      pgoff_t start, unsigned int nr_pages,
+-                                      struct page **pages, pgoff_t *indices)
+-{
+-      void **slot;
+-      unsigned int ret = 0;
+-      struct radix_tree_iter iter;
+-
+-      if (!nr_pages)
+-              return 0;
+-
+-      rcu_read_lock();
+-restart:
+-      radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+-              struct page *page;
+-repeat:
+-              page = radix_tree_deref_slot(slot);
+-              if (unlikely(!page))
+-                      continue;
+-              if (radix_tree_exception(page)) {
+-                      if (radix_tree_deref_retry(page))
+-                              goto restart;
+-                      /*
+-                       * Otherwise, we must be storing a swap entry
+-                       * here as an exceptional entry: so return it
+-                       * without attempting to raise page count.
+-                       */
+-                      goto export;
+-              }
+-              if (!page_cache_get_speculative(page))
+-                      goto repeat;
+-
+-              /* Has the page moved? */
+-              if (unlikely(page != *slot)) {
+-                      page_cache_release(page);
+-                      goto repeat;
+-              }
+-export:
+-              indices[ret] = iter.index;
+-              pages[ret] = page;
+-              if (++ret == nr_pages)
+-                      break;
+-      }
+-      rcu_read_unlock();
+-      return ret;
+-}
+-
+-/*
+  * Remove swap entry from radix tree, free the swap and its page cache.
+  */
+ static int shmem_free_swap(struct address_space *mapping,
+@@ -397,21 +347,6 @@ static int shmem_free_swap(struct addres
+ }
+ /*
+- * Pagevec may contain swap entries, so shuffle up pages before releasing.
+- */
+-static void shmem_deswap_pagevec(struct pagevec *pvec)
+-{
+-      int i, j;
+-
+-      for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
+-              struct page *page = pvec->pages[i];
+-              if (!radix_tree_exceptional_entry(page))
+-                      pvec->pages[j++] = page;
+-      }
+-      pvec->nr = j;
+-}
+-
+-/*
+  * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
+  */
+ void shmem_unlock_mapping(struct address_space *mapping)
+@@ -429,12 +364,12 @@ void shmem_unlock_mapping(struct address
+                * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
+                * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
+                */
+-              pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+-                                      PAGEVEC_SIZE, pvec.pages, indices);
++              pvec.nr = find_get_entries(mapping, index,
++                                         PAGEVEC_SIZE, pvec.pages, indices);
+               if (!pvec.nr)
+                       break;
+               index = indices[pvec.nr - 1] + 1;
+-              shmem_deswap_pagevec(&pvec);
++              pagevec_remove_exceptionals(&pvec);
+               check_move_unevictable_pages(pvec.pages, pvec.nr);
+               pagevec_release(&pvec);
+               cond_resched();
+@@ -466,9 +401,9 @@ static void shmem_undo_range(struct inod
+       pagevec_init(&pvec, 0);
+       index = start;
+       while (index < end) {
+-              pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+-                              min(end - index, (pgoff_t)PAGEVEC_SIZE),
+-                                                      pvec.pages, indices);
++              pvec.nr = find_get_entries(mapping, index,
++                      min(end - index, (pgoff_t)PAGEVEC_SIZE),
++                      pvec.pages, indices);
+               if (!pvec.nr)
+                       break;
+               mem_cgroup_uncharge_start();
+@@ -497,7 +432,7 @@ static void shmem_undo_range(struct inod
+                       }
+                       unlock_page(page);
+               }
+-              shmem_deswap_pagevec(&pvec);
++              pagevec_remove_exceptionals(&pvec);
+               pagevec_release(&pvec);
+               mem_cgroup_uncharge_end();
+               cond_resched();
+@@ -535,9 +470,10 @@ static void shmem_undo_range(struct inod
+       index = start;
+       while (index < end) {
+               cond_resched();
+-              pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
++
++              pvec.nr = find_get_entries(mapping, index,
+                               min(end - index, (pgoff_t)PAGEVEC_SIZE),
+-                                                      pvec.pages, indices);
++                              pvec.pages, indices);
+               if (!pvec.nr) {
+                       /* If all gone or hole-punch or unfalloc, we're done */
+                       if (index == start || end != -1)
+@@ -580,7 +516,7 @@ static void shmem_undo_range(struct inod
+                       }
+                       unlock_page(page);
+               }
+-              shmem_deswap_pagevec(&pvec);
++              pagevec_remove_exceptionals(&pvec);
+               pagevec_release(&pvec);
+               mem_cgroup_uncharge_end();
+               index++;
+@@ -1087,7 +1023,7 @@ static int shmem_getpage_gfp(struct inod
+               return -EFBIG;
+ repeat:
+       swap.val = 0;
+-      page = find_lock_page(mapping, index);
++      page = find_lock_entry(mapping, index);
+       if (radix_tree_exceptional_entry(page)) {
+               swap = radix_to_swp_entry(page);
+               page = NULL;
+@@ -1482,6 +1418,11 @@ static struct inode *shmem_get_inode(str
+       return inode;
+ }
++bool shmem_mapping(struct address_space *mapping)
++{
++      return mapping->backing_dev_info == &shmem_backing_dev_info;
++}
++
+ #ifdef CONFIG_TMPFS
+ static const struct inode_operations shmem_symlink_inode_operations;
+ static const struct inode_operations shmem_short_symlink_operations;
+@@ -1794,7 +1735,7 @@ static pgoff_t shmem_seek_hole_data(stru
+       pagevec_init(&pvec, 0);
+       pvec.nr = 1;            /* start small: we may be there already */
+       while (!done) {
+-              pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
++              pvec.nr = find_get_entries(mapping, index,
+                                       pvec.nr, pvec.pages, indices);
+               if (!pvec.nr) {
+                       if (whence == SEEK_DATA)
+@@ -1821,7 +1762,7 @@ static pgoff_t shmem_seek_hole_data(stru
+                               break;
+                       }
+               }
+-              shmem_deswap_pagevec(&pvec);
++              pagevec_remove_exceptionals(&pvec);
+               pagevec_release(&pvec);
+               pvec.nr = PAGEVEC_SIZE;
+               cond_resched();
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -948,6 +948,57 @@ void __pagevec_lru_add(struct pagevec *p
+ EXPORT_SYMBOL(__pagevec_lru_add);
+ /**
++ * pagevec_lookup_entries - gang pagecache lookup
++ * @pvec:     Where the resulting entries are placed
++ * @mapping:  The address_space to search
++ * @start:    The starting entry index
++ * @nr_entries:       The maximum number of entries
++ * @indices:  The cache indices corresponding to the entries in @pvec
++ *
++ * pagevec_lookup_entries() will search for and return a group of up
++ * to @nr_entries pages and shadow entries in the mapping.  All
++ * entries are placed in @pvec.  pagevec_lookup_entries() takes a
++ * reference against actual pages in @pvec.
++ *
++ * The search returns a group of mapping-contiguous entries with
++ * ascending indexes.  There may be holes in the indices due to
++ * not-present entries.
++ *
++ * pagevec_lookup_entries() returns the number of entries which were
++ * found.
++ */
++unsigned pagevec_lookup_entries(struct pagevec *pvec,
++                              struct address_space *mapping,
++                              pgoff_t start, unsigned nr_pages,
++                              pgoff_t *indices)
++{
++      pvec->nr = find_get_entries(mapping, start, nr_pages,
++                                  pvec->pages, indices);
++      return pagevec_count(pvec);
++}
++
++/**
++ * pagevec_remove_exceptionals - pagevec exceptionals pruning
++ * @pvec:     The pagevec to prune
++ *
++ * pagevec_lookup_entries() fills both pages and exceptional radix
++ * tree entries into the pagevec.  This function prunes all
++ * exceptionals from @pvec without leaving holes, so that it can be
++ * passed on to page-only pagevec operations.
++ */
++void pagevec_remove_exceptionals(struct pagevec *pvec)
++{
++      int i, j;
++
++      for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
++              struct page *page = pvec->pages[i];
++              if (!radix_tree_exceptional_entry(page))
++                      pvec->pages[j++] = page;
++      }
++      pvec->nr = j;
++}
++
++/**
+  * pagevec_lookup - gang pagecache lookup
+  * @pvec:     Where the resulting pages are placed
+  * @mapping:  The address_space to search
+--- a/mm/truncate.c
++++ b/mm/truncate.c
+@@ -23,6 +23,22 @@
+ #include <linux/rmap.h>
+ #include "internal.h"
++static void clear_exceptional_entry(struct address_space *mapping,
++                                  pgoff_t index, void *entry)
++{
++      /* Handled by shmem itself */
++      if (shmem_mapping(mapping))
++              return;
++
++      spin_lock_irq(&mapping->tree_lock);
++      /*
++       * Regular page slots are stabilized by the page lock even
++       * without the tree itself locked.  These unlocked entries
++       * need verification under the tree lock.
++       */
++      radix_tree_delete_item(&mapping->page_tree, index, entry);
++      spin_unlock_irq(&mapping->tree_lock);
++}
+ /**
+  * do_invalidatepage - invalidate part or all of a page
+@@ -209,6 +225,7 @@ void truncate_inode_pages_range(struct a
+       unsigned int    partial_start;  /* inclusive */
+       unsigned int    partial_end;    /* exclusive */
+       struct pagevec  pvec;
++      pgoff_t         indices[PAGEVEC_SIZE];
+       pgoff_t         index;
+       int             i;
+@@ -239,17 +256,23 @@ void truncate_inode_pages_range(struct a
+       pagevec_init(&pvec, 0);
+       index = start;
+-      while (index < end && pagevec_lookup(&pvec, mapping, index,
+-                      min(end - index, (pgoff_t)PAGEVEC_SIZE))) {
++      while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
++                      min(end - index, (pgoff_t)PAGEVEC_SIZE),
++                      indices)) {
+               mem_cgroup_uncharge_start();
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       struct page *page = pvec.pages[i];
+                       /* We rely upon deletion not changing page->index */
+-                      index = page->index;
++                      index = indices[i];
+                       if (index >= end)
+                               break;
++                      if (radix_tree_exceptional_entry(page)) {
++                              clear_exceptional_entry(mapping, index, page);
++                              continue;
++                      }
++
+                       if (!trylock_page(page))
+                               continue;
+                       WARN_ON(page->index != index);
+@@ -260,6 +283,7 @@ void truncate_inode_pages_range(struct a
+                       truncate_inode_page(mapping, page);
+                       unlock_page(page);
+               }
++              pagevec_remove_exceptionals(&pvec);
+               pagevec_release(&pvec);
+               mem_cgroup_uncharge_end();
+               cond_resched();
+@@ -308,14 +332,16 @@ void truncate_inode_pages_range(struct a
+       index = start;
+       for ( ; ; ) {
+               cond_resched();
+-              if (!pagevec_lookup(&pvec, mapping, index,
+-                      min(end - index, (pgoff_t)PAGEVEC_SIZE))) {
++              if (!pagevec_lookup_entries(&pvec, mapping, index,
++                      min(end - index, (pgoff_t)PAGEVEC_SIZE),
++                      indices)) {
+                       if (index == start)
+                               break;
+                       index = start;
+                       continue;
+               }
+-              if (index == start && pvec.pages[0]->index >= end) {
++              if (index == start && indices[0] >= end) {
++                      pagevec_remove_exceptionals(&pvec);
+                       pagevec_release(&pvec);
+                       break;
+               }
+@@ -324,16 +350,22 @@ void truncate_inode_pages_range(struct a
+                       struct page *page = pvec.pages[i];
+                       /* We rely upon deletion not changing page->index */
+-                      index = page->index;
++                      index = indices[i];
+                       if (index >= end)
+                               break;
++                      if (radix_tree_exceptional_entry(page)) {
++                              clear_exceptional_entry(mapping, index, page);
++                              continue;
++                      }
++
+                       lock_page(page);
+                       WARN_ON(page->index != index);
+                       wait_on_page_writeback(page);
+                       truncate_inode_page(mapping, page);
+                       unlock_page(page);
+               }
++              pagevec_remove_exceptionals(&pvec);
+               pagevec_release(&pvec);
+               mem_cgroup_uncharge_end();
+               index++;
+@@ -376,6 +408,7 @@ EXPORT_SYMBOL(truncate_inode_pages);
+ unsigned long invalidate_mapping_pages(struct address_space *mapping,
+               pgoff_t start, pgoff_t end)
+ {
++      pgoff_t indices[PAGEVEC_SIZE];
+       struct pagevec pvec;
+       pgoff_t index = start;
+       unsigned long ret;
+@@ -391,17 +424,23 @@ unsigned long invalidate_mapping_pages(s
+        */
+       pagevec_init(&pvec, 0);
+-      while (index <= end && pagevec_lookup(&pvec, mapping, index,
+-                      min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
++      while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
++                      min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
++                      indices)) {
+               mem_cgroup_uncharge_start();
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       struct page *page = pvec.pages[i];
+                       /* We rely upon deletion not changing page->index */
+-                      index = page->index;
++                      index = indices[i];
+                       if (index > end)
+                               break;
++                      if (radix_tree_exceptional_entry(page)) {
++                              clear_exceptional_entry(mapping, index, page);
++                              continue;
++                      }
++
+                       if (!trylock_page(page))
+                               continue;
+                       WARN_ON(page->index != index);
+@@ -415,6 +454,7 @@ unsigned long invalidate_mapping_pages(s
+                               deactivate_page(page);
+                       count += ret;
+               }
++              pagevec_remove_exceptionals(&pvec);
+               pagevec_release(&pvec);
+               mem_cgroup_uncharge_end();
+               cond_resched();
+@@ -482,6 +522,7 @@ static int do_launder_page(struct addres
+ int invalidate_inode_pages2_range(struct address_space *mapping,
+                                 pgoff_t start, pgoff_t end)
+ {
++      pgoff_t indices[PAGEVEC_SIZE];
+       struct pagevec pvec;
+       pgoff_t index;
+       int i;
+@@ -492,17 +533,23 @@ int invalidate_inode_pages2_range(struct
+       cleancache_invalidate_inode(mapping);
+       pagevec_init(&pvec, 0);
+       index = start;
+-      while (index <= end && pagevec_lookup(&pvec, mapping, index,
+-                      min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
++      while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
++                      min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
++                      indices)) {
+               mem_cgroup_uncharge_start();
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       struct page *page = pvec.pages[i];
+                       /* We rely upon deletion not changing page->index */
+-                      index = page->index;
++                      index = indices[i];
+                       if (index > end)
+                               break;
++                      if (radix_tree_exceptional_entry(page)) {
++                              clear_exceptional_entry(mapping, index, page);
++                              continue;
++                      }
++
+                       lock_page(page);
+                       WARN_ON(page->index != index);
+                       if (page->mapping != mapping) {
+@@ -540,6 +587,7 @@ int invalidate_inode_pages2_range(struct
+                               ret = ret2;
+                       unlock_page(page);
+               }
++              pagevec_remove_exceptionals(&pvec);
+               pagevec_release(&pvec);
+               mem_cgroup_uncharge_end();
+               cond_resched();
diff --git a/queue-3.14/mm-madvise-fix-madv_willneed-on-shmem-swapouts.patch b/queue-3.14/mm-madvise-fix-madv_willneed-on-shmem-swapouts.patch
new file mode 100644 (file)
index 0000000..b593627
--- /dev/null
@@ -0,0 +1,42 @@
+From 55231e5c898c5c03c14194001e349f40f59bd300 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 22 May 2014 11:54:17 -0700
+Subject: mm: madvise: fix MADV_WILLNEED on shmem swapouts
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 55231e5c898c5c03c14194001e349f40f59bd300 upstream.
+
+MADV_WILLNEED currently does not read swapped out shmem pages back in.
+
+Commit 0cd6144aadd2 ("mm + fs: prepare for non-page entries in page
+cache radix trees") made find_get_page() filter exceptional radix tree
+entries but failed to convert all find_get_page() callers that WANT
+exceptional entries over to find_get_entry().  One of them is shmem swap
+readahead in madvise, which now skips over any swap-out records.
+
+Convert it to find_get_entry().
+
+Fixes: 0cd6144aadd2 ("mm + fs: prepare for non-page entries in page cache radix trees")
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/madvise.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -195,7 +195,7 @@ static void force_shm_swapin_readahead(s
+       for (; start < end; start += PAGE_SIZE) {
+               index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+-              page = find_get_page(mapping, index);
++              page = find_get_entry(mapping, index);
+               if (!radix_tree_exceptional_entry(page)) {
+                       if (page)
+                               page_cache_release(page);
diff --git a/queue-3.14/mm-migration-add-destination-page-freeing-callback.patch b/queue-3.14/mm-migration-add-destination-page-freeing-callback.patch
new file mode 100644 (file)
index 0000000..4399621
--- /dev/null
@@ -0,0 +1,285 @@
+From 68711a746345c44ae00c64d8dbac6a9ce13ac54a Mon Sep 17 00:00:00 2001
+From: David Rientjes <rientjes@google.com>
+Date: Wed, 4 Jun 2014 16:08:25 -0700
+Subject: mm, migration: add destination page freeing callback
+
+From: David Rientjes <rientjes@google.com>
+
+commit 68711a746345c44ae00c64d8dbac6a9ce13ac54a upstream.
+
+Memory migration uses a callback defined by the caller to determine how to
+allocate destination pages.  When migration fails for a source page,
+however, it frees the destination page back to the system.
+
+This patch adds a memory migration callback defined by the caller to
+determine how to free destination pages.  If a caller, such as memory
+compaction, builds its own freelist for migration targets, this can reuse
+already freed memory instead of scanning additional memory.
+
+If the caller provides a function to handle freeing of destination pages,
+it is called when page migration fails.  If the caller passes NULL then
+freeing back to the system will be handled as usual.  This patch
+introduces no functional change.
+
+Signed-off-by: David Rientjes <rientjes@google.com>
+Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Greg Thelen <gthelen@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ include/linux/migrate.h |   11 ++++++---
+ mm/compaction.c         |    2 -
+ mm/memory-failure.c     |    4 +--
+ mm/memory_hotplug.c     |    2 -
+ mm/mempolicy.c          |    4 +--
+ mm/migrate.c            |   55 ++++++++++++++++++++++++++++++++++--------------
+ mm/page_alloc.c         |    2 -
+ 7 files changed, 53 insertions(+), 27 deletions(-)
+
+--- a/include/linux/migrate.h
++++ b/include/linux/migrate.h
+@@ -5,7 +5,9 @@
+ #include <linux/mempolicy.h>
+ #include <linux/migrate_mode.h>
+-typedef struct page *new_page_t(struct page *, unsigned long private, int **);
++typedef struct page *new_page_t(struct page *page, unsigned long private,
++                              int **reason);
++typedef void free_page_t(struct page *page, unsigned long private);
+ /*
+  * Return values from addresss_space_operations.migratepage():
+@@ -38,7 +40,7 @@ enum migrate_reason {
+ extern void putback_movable_pages(struct list_head *l);
+ extern int migrate_page(struct address_space *,
+                       struct page *, struct page *, enum migrate_mode);
+-extern int migrate_pages(struct list_head *l, new_page_t x,
++extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
+               unsigned long private, enum migrate_mode mode, int reason);
+ extern int migrate_prep(void);
+@@ -56,8 +58,9 @@ extern int migrate_page_move_mapping(str
+ #else
+ static inline void putback_movable_pages(struct list_head *l) {}
+-static inline int migrate_pages(struct list_head *l, new_page_t x,
+-              unsigned long private, enum migrate_mode mode, int reason)
++static inline int migrate_pages(struct list_head *l, new_page_t new,
++              free_page_t free, unsigned long private, enum migrate_mode mode,
++              int reason)
+       { return -ENOSYS; }
+ static inline int migrate_prep(void) { return -ENOSYS; }
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -1016,7 +1016,7 @@ static int compact_zone(struct zone *zon
+               }
+               nr_migrate = cc->nr_migratepages;
+-              err = migrate_pages(&cc->migratepages, compaction_alloc,
++              err = migrate_pages(&cc->migratepages, compaction_alloc, NULL,
+                               (unsigned long)cc,
+                               cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC,
+                               MR_COMPACTION);
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1540,7 +1540,7 @@ static int soft_offline_huge_page(struct
+       /* Keep page count to indicate a given hugepage is isolated. */
+       list_move(&hpage->lru, &pagelist);
+-      ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
++      ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
+                               MIGRATE_SYNC, MR_MEMORY_FAILURE);
+       if (ret) {
+               pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
+@@ -1621,7 +1621,7 @@ static int __soft_offline_page(struct pa
+               inc_zone_page_state(page, NR_ISOLATED_ANON +
+                                       page_is_file_cache(page));
+               list_add(&page->lru, &pagelist);
+-              ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
++              ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
+                                       MIGRATE_SYNC, MR_MEMORY_FAILURE);
+               if (ret) {
+                       if (!list_empty(&pagelist)) {
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -1332,7 +1332,7 @@ do_migrate_range(unsigned long start_pfn
+                * alloc_migrate_target should be improooooved!!
+                * migrate_pages returns # of failed pages.
+                */
+-              ret = migrate_pages(&source, alloc_migrate_target, 0,
++              ret = migrate_pages(&source, alloc_migrate_target, NULL, 0,
+                                       MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
+               if (ret)
+                       putback_movable_pages(&source);
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -1060,7 +1060,7 @@ static int migrate_to_node(struct mm_str
+                       flags | MPOL_MF_DISCONTIG_OK, &pagelist);
+       if (!list_empty(&pagelist)) {
+-              err = migrate_pages(&pagelist, new_node_page, dest,
++              err = migrate_pages(&pagelist, new_node_page, NULL, dest,
+                                       MIGRATE_SYNC, MR_SYSCALL);
+               if (err)
+                       putback_movable_pages(&pagelist);
+@@ -1306,7 +1306,7 @@ static long do_mbind(unsigned long start
+               if (!list_empty(&pagelist)) {
+                       WARN_ON_ONCE(flags & MPOL_MF_LAZY);
+-                      nr_failed = migrate_pages(&pagelist, new_page,
++                      nr_failed = migrate_pages(&pagelist, new_page, NULL,
+                               start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND);
+                       if (nr_failed)
+                               putback_movable_pages(&pagelist);
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -941,8 +941,9 @@ out:
+  * Obtain the lock on page, remove all ptes and migrate the page
+  * to the newly allocated page in newpage.
+  */
+-static int unmap_and_move(new_page_t get_new_page, unsigned long private,
+-                      struct page *page, int force, enum migrate_mode mode)
++static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page,
++                      unsigned long private, struct page *page, int force,
++                      enum migrate_mode mode)
+ {
+       int rc = 0;
+       int *result = NULL;
+@@ -986,11 +987,17 @@ out:
+                               page_is_file_cache(page));
+               putback_lru_page(page);
+       }
++
+       /*
+-       * Move the new page to the LRU. If migration was not successful
+-       * then this will free the page.
++       * If migration was not successful and there's a freeing callback, use
++       * it.  Otherwise, putback_lru_page() will drop the reference grabbed
++       * during isolation.
+        */
+-      putback_lru_page(newpage);
++      if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
++              put_new_page(newpage, private);
++      else
++              putback_lru_page(newpage);
++
+       if (result) {
+               if (rc)
+                       *result = rc;
+@@ -1019,8 +1026,9 @@ out:
+  * will wait in the page fault for migration to complete.
+  */
+ static int unmap_and_move_huge_page(new_page_t get_new_page,
+-                              unsigned long private, struct page *hpage,
+-                              int force, enum migrate_mode mode)
++                              free_page_t put_new_page, unsigned long private,
++                              struct page *hpage, int force,
++                              enum migrate_mode mode)
+ {
+       int rc = 0;
+       int *result = NULL;
+@@ -1059,20 +1067,30 @@ static int unmap_and_move_huge_page(new_
+       if (!page_mapped(hpage))
+               rc = move_to_new_page(new_hpage, hpage, 1, mode);
+-      if (rc)
++      if (rc != MIGRATEPAGE_SUCCESS)
+               remove_migration_ptes(hpage, hpage);
+       if (anon_vma)
+               put_anon_vma(anon_vma);
+-      if (!rc)
++      if (rc == MIGRATEPAGE_SUCCESS)
+               hugetlb_cgroup_migrate(hpage, new_hpage);
+       unlock_page(hpage);
+ out:
+       if (rc != -EAGAIN)
+               putback_active_hugepage(hpage);
+-      put_page(new_hpage);
++
++      /*
++       * If migration was not successful and there's a freeing callback, use
++       * it.  Otherwise, put_page() will drop the reference grabbed during
++       * isolation.
++       */
++      if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
++              put_new_page(new_hpage, private);
++      else
++              put_page(new_hpage);
++
+       if (result) {
+               if (rc)
+                       *result = rc;
+@@ -1089,6 +1107,8 @@ out:
+  * @from:             The list of pages to be migrated.
+  * @get_new_page:     The function used to allocate free pages to be used
+  *                    as the target of the page migration.
++ * @put_new_page:     The function used to free target pages if migration
++ *                    fails, or NULL if no special handling is necessary.
+  * @private:          Private data to be passed on to get_new_page()
+  * @mode:             The migration mode that specifies the constraints for
+  *                    page migration, if any.
+@@ -1102,7 +1122,8 @@ out:
+  * Returns the number of pages that were not migrated, or an error code.
+  */
+ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+-              unsigned long private, enum migrate_mode mode, int reason)
++              free_page_t put_new_page, unsigned long private,
++              enum migrate_mode mode, int reason)
+ {
+       int retry = 1;
+       int nr_failed = 0;
+@@ -1124,10 +1145,11 @@ int migrate_pages(struct list_head *from
+                       if (PageHuge(page))
+                               rc = unmap_and_move_huge_page(get_new_page,
+-                                              private, page, pass > 2, mode);
++                                              put_new_page, private, page,
++                                              pass > 2, mode);
+                       else
+-                              rc = unmap_and_move(get_new_page, private,
+-                                              page, pass > 2, mode);
++                              rc = unmap_and_move(get_new_page, put_new_page,
++                                              private, page, pass > 2, mode);
+                       switch(rc) {
+                       case -ENOMEM:
+@@ -1276,7 +1298,7 @@ set_status:
+       err = 0;
+       if (!list_empty(&pagelist)) {
+-              err = migrate_pages(&pagelist, new_page_node,
++              err = migrate_pages(&pagelist, new_page_node, NULL,
+                               (unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
+               if (err)
+                       putback_movable_pages(&pagelist);
+@@ -1732,7 +1754,8 @@ int migrate_misplaced_page(struct page *
+       list_add(&page->lru, &migratepages);
+       nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
+-                                   node, MIGRATE_ASYNC, MR_NUMA_MISPLACED);
++                                   NULL, node, MIGRATE_ASYNC,
++                                   MR_NUMA_MISPLACED);
+       if (nr_remaining) {
+               if (!list_empty(&migratepages)) {
+                       list_del(&page->lru);
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -6261,7 +6261,7 @@ static int __alloc_contig_migrate_range(
+               cc->nr_migratepages -= nr_reclaimed;
+               ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
+-                                  0, MIGRATE_SYNC, MR_CMA);
++                                  NULL, 0, MIGRATE_SYNC, MR_CMA);
+       }
+       if (ret < 0) {
+               putback_movable_pages(&cc->migratepages);
diff --git a/queue-3.14/mm-readahead.c-inline-ra_submit.patch b/queue-3.14/mm-readahead.c-inline-ra_submit.patch
new file mode 100644 (file)
index 0000000..4e81e59
--- /dev/null
@@ -0,0 +1,123 @@
+From 29f175d125f0f3a9503af8a5596f93d714cceb08 Mon Sep 17 00:00:00 2001
+From: Fabian Frederick <fabf@skynet.be>
+Date: Mon, 7 Apr 2014 15:37:55 -0700
+Subject: mm/readahead.c: inline ra_submit
+
+From: Fabian Frederick <fabf@skynet.be>
+
+commit 29f175d125f0f3a9503af8a5596f93d714cceb08 upstream.
+
+Commit f9acc8c7b35a ("readahead: sanify file_ra_state names") left
+ra_submit with a single function call.
+
+Move ra_submit to internal.h and inline it to save some stack.  Thanks
+to Andrew Morton for commenting different versions.
+
+Signed-off-by: Fabian Frederick <fabf@skynet.be>
+Suggested-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mm.h |    3 ---
+ mm/internal.h      |   15 +++++++++++++++
+ mm/readahead.c     |   21 +++------------------
+ 3 files changed, 18 insertions(+), 21 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1856,9 +1856,6 @@ void page_cache_async_readahead(struct a
+                               unsigned long size);
+ unsigned long max_sane_readahead(unsigned long nr);
+-unsigned long ra_submit(struct file_ra_state *ra,
+-                      struct address_space *mapping,
+-                      struct file *filp);
+ /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
+ extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -11,6 +11,7 @@
+ #ifndef __MM_INTERNAL_H
+ #define __MM_INTERNAL_H
++#include <linux/fs.h>
+ #include <linux/mm.h>
+ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
+@@ -21,6 +22,20 @@ static inline void set_page_count(struct
+       atomic_set(&page->_count, v);
+ }
++extern int __do_page_cache_readahead(struct address_space *mapping,
++              struct file *filp, pgoff_t offset, unsigned long nr_to_read,
++              unsigned long lookahead_size);
++
++/*
++ * Submit IO for the read-ahead request in file_ra_state.
++ */
++static inline unsigned long ra_submit(struct file_ra_state *ra,
++              struct address_space *mapping, struct file *filp)
++{
++      return __do_page_cache_readahead(mapping, filp,
++                                      ra->start, ra->size, ra->async_size);
++}
++
+ /*
+  * Turn a non-refcounted page (->_count == 0) into refcounted with
+  * a count of one.
+--- a/mm/readahead.c
++++ b/mm/readahead.c
+@@ -8,9 +8,7 @@
+  */
+ #include <linux/kernel.h>
+-#include <linux/fs.h>
+ #include <linux/gfp.h>
+-#include <linux/mm.h>
+ #include <linux/export.h>
+ #include <linux/blkdev.h>
+ #include <linux/backing-dev.h>
+@@ -20,6 +18,8 @@
+ #include <linux/syscalls.h>
+ #include <linux/file.h>
++#include "internal.h"
++
+ /*
+  * Initialise a struct file's readahead state.  Assumes that the caller has
+  * memset *ra to zero.
+@@ -149,8 +149,7 @@ out:
+  *
+  * Returns the number of pages requested, or the maximum amount of I/O allowed.
+  */
+-static int
+-__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
++int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
+                       pgoff_t offset, unsigned long nr_to_read,
+                       unsigned long lookahead_size)
+ {
+@@ -244,20 +243,6 @@ unsigned long max_sane_readahead(unsigne
+ }
+ /*
+- * Submit IO for the read-ahead request in file_ra_state.
+- */
+-unsigned long ra_submit(struct file_ra_state *ra,
+-                     struct address_space *mapping, struct file *filp)
+-{
+-      int actual;
+-
+-      actual = __do_page_cache_readahead(mapping, filp,
+-                                      ra->start, ra->size, ra->async_size);
+-
+-      return actual;
+-}
+-
+-/*
+  * Set the initial window size, round to next power of 2 and square
+  * for small size, x 4 for medium, and x 2 for large
+  * for 128k (32 page) max ra
diff --git a/queue-3.14/mm-remove-read_cache_page_async.patch b/queue-3.14/mm-remove-read_cache_page_async.patch
new file mode 100644 (file)
index 0000000..8accf4c
--- /dev/null
@@ -0,0 +1,222 @@
+From 67f9fd91f93c582b7de2ab9325b6e179db77e4d5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sasha.levin@oracle.com>
+Date: Thu, 3 Apr 2014 14:48:18 -0700
+Subject: mm: remove read_cache_page_async()
+
+From: Sasha Levin <sasha.levin@oracle.com>
+
+commit 67f9fd91f93c582b7de2ab9325b6e179db77e4d5 upstream.
+
+This patch removes read_cache_page_async() which wasn't really needed
+anywhere and simplifies the code around it a bit.
+
+read_cache_page_async() is useful when we want to read a page into the
+cache without waiting for it to complete.  This happens when the
+appropriate callback 'filler' doesn't complete its read operation and
+releases the page lock immediately, and instead queues a different
+completion routine to do that.  This never actually happened anywhere in
+the code.
+
+read_cache_page_async() had 3 different callers:
+
+- read_cache_page() which is the sync version, it would just wait for
+  the requested read to complete using wait_on_page_read().
+
+- JFFS2 would call it from jffs2_gc_fetch_page(), but the filler
+  function it supplied doesn't do any async reads, and would complete
+  before the filler function returns - making it actually a sync read.
+
+- CRAMFS would call it using the read_mapping_page_async() wrapper, with
+  a similar story to JFFS2 - the filler function doesn't do anything that
+  reminds async reads and would always complete before the filler function
+  returns.
+
+To sum it up, the code in mm/filemap.c never took advantage of having
+read_cache_page_async().  While there are filler callbacks that do async
+reads (such as the block one), we always called it with the
+read_cache_page().
+
+This patch adds a mandatory wait for read to complete when adding a new
+page to the cache, and removes read_cache_page_async() and its wrappers.
+
+Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cramfs/inode.c       |    3 --
+ fs/jffs2/fs.c           |    2 -
+ include/linux/pagemap.h |   10 -------
+ mm/filemap.c            |   64 +++++++++++++++++-------------------------------
+ 4 files changed, 25 insertions(+), 54 deletions(-)
+
+--- a/fs/cramfs/inode.c
++++ b/fs/cramfs/inode.c
+@@ -195,8 +195,7 @@ static void *cramfs_read(struct super_bl
+               struct page *page = NULL;
+               if (blocknr + i < devsize) {
+-                      page = read_mapping_page_async(mapping, blocknr + i,
+-                                                                      NULL);
++                      page = read_mapping_page(mapping, blocknr + i, NULL);
+                       /* synchronous error? */
+                       if (IS_ERR(page))
+                               page = NULL;
+--- a/fs/jffs2/fs.c
++++ b/fs/jffs2/fs.c
+@@ -687,7 +687,7 @@ unsigned char *jffs2_gc_fetch_page(struc
+       struct inode *inode = OFNI_EDONI_2SFFJ(f);
+       struct page *pg;
+-      pg = read_cache_page_async(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
++      pg = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
+                            (void *)jffs2_do_readpage_unlock, inode);
+       if (IS_ERR(pg))
+               return (void *)pg;
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -278,8 +278,6 @@ static inline struct page *grab_cache_pa
+ extern struct page * grab_cache_page_nowait(struct address_space *mapping,
+                               pgoff_t index);
+-extern struct page * read_cache_page_async(struct address_space *mapping,
+-                              pgoff_t index, filler_t *filler, void *data);
+ extern struct page * read_cache_page(struct address_space *mapping,
+                               pgoff_t index, filler_t *filler, void *data);
+ extern struct page * read_cache_page_gfp(struct address_space *mapping,
+@@ -287,14 +285,6 @@ extern struct page * read_cache_page_gfp
+ extern int read_cache_pages(struct address_space *mapping,
+               struct list_head *pages, filler_t *filler, void *data);
+-static inline struct page *read_mapping_page_async(
+-                              struct address_space *mapping,
+-                              pgoff_t index, void *data)
+-{
+-      filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+-      return read_cache_page_async(mapping, index, filler, data);
+-}
+-
+ static inline struct page *read_mapping_page(struct address_space *mapping,
+                               pgoff_t index, void *data)
+ {
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -2027,6 +2027,18 @@ int generic_file_readonly_mmap(struct fi
+ EXPORT_SYMBOL(generic_file_mmap);
+ EXPORT_SYMBOL(generic_file_readonly_mmap);
++static struct page *wait_on_page_read(struct page *page)
++{
++      if (!IS_ERR(page)) {
++              wait_on_page_locked(page);
++              if (!PageUptodate(page)) {
++                      page_cache_release(page);
++                      page = ERR_PTR(-EIO);
++              }
++      }
++      return page;
++}
++
+ static struct page *__read_cache_page(struct address_space *mapping,
+                               pgoff_t index,
+                               int (*filler)(void *, struct page *),
+@@ -2053,6 +2065,8 @@ repeat:
+               if (err < 0) {
+                       page_cache_release(page);
+                       page = ERR_PTR(err);
++              } else {
++                      page = wait_on_page_read(page);
+               }
+       }
+       return page;
+@@ -2089,6 +2103,10 @@ retry:
+       if (err < 0) {
+               page_cache_release(page);
+               return ERR_PTR(err);
++      } else {
++              page = wait_on_page_read(page);
++              if (IS_ERR(page))
++                      return page;
+       }
+ out:
+       mark_page_accessed(page);
+@@ -2096,40 +2114,25 @@ out:
+ }
+ /**
+- * read_cache_page_async - read into page cache, fill it if needed
++ * read_cache_page - read into page cache, fill it if needed
+  * @mapping:  the page's address_space
+  * @index:    the page index
+  * @filler:   function to perform the read
+  * @data:     first arg to filler(data, page) function, often left as NULL
+  *
+- * Same as read_cache_page, but don't wait for page to become unlocked
+- * after submitting it to the filler.
+- *
+  * Read into the page cache. If a page already exists, and PageUptodate() is
+- * not set, try to fill the page but don't wait for it to become unlocked.
++ * not set, try to fill the page and wait for it to become unlocked.
+  *
+  * If the page does not get brought uptodate, return -EIO.
+  */
+-struct page *read_cache_page_async(struct address_space *mapping,
++struct page *read_cache_page(struct address_space *mapping,
+                               pgoff_t index,
+                               int (*filler)(void *, struct page *),
+                               void *data)
+ {
+       return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
+ }
+-EXPORT_SYMBOL(read_cache_page_async);
+-
+-static struct page *wait_on_page_read(struct page *page)
+-{
+-      if (!IS_ERR(page)) {
+-              wait_on_page_locked(page);
+-              if (!PageUptodate(page)) {
+-                      page_cache_release(page);
+-                      page = ERR_PTR(-EIO);
+-              }
+-      }
+-      return page;
+-}
++EXPORT_SYMBOL(read_cache_page);
+ /**
+  * read_cache_page_gfp - read into page cache, using specified page allocation flags.
+@@ -2148,31 +2151,10 @@ struct page *read_cache_page_gfp(struct
+ {
+       filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+-      return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp));
++      return do_read_cache_page(mapping, index, filler, NULL, gfp);
+ }
+ EXPORT_SYMBOL(read_cache_page_gfp);
+-/**
+- * read_cache_page - read into page cache, fill it if needed
+- * @mapping:  the page's address_space
+- * @index:    the page index
+- * @filler:   function to perform the read
+- * @data:     first arg to filler(data, page) function, often left as NULL
+- *
+- * Read into the page cache. If a page already exists, and PageUptodate() is
+- * not set, try to fill the page then wait for it to become unlocked.
+- *
+- * If the page does not get brought uptodate, return -EIO.
+- */
+-struct page *read_cache_page(struct address_space *mapping,
+-                              pgoff_t index,
+-                              int (*filler)(void *, struct page *),
+-                              void *data)
+-{
+-      return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
+-}
+-EXPORT_SYMBOL(read_cache_page);
+-
+ static size_t __iovec_copy_from_user_inatomic(char *vaddr,
+                       const struct iovec *iov, size_t base, size_t bytes)
+ {
diff --git a/queue-3.14/mm-shmem-save-one-radix-tree-lookup-when-truncating-swapped-pages.patch b/queue-3.14/mm-shmem-save-one-radix-tree-lookup-when-truncating-swapped-pages.patch
new file mode 100644 (file)
index 0000000..23e360f
--- /dev/null
@@ -0,0 +1,97 @@
+From 6dbaf22ce1f1dfba33313198eb5bd989ae76dd87 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 3 Apr 2014 14:47:41 -0700
+Subject: mm: shmem: save one radix tree lookup when truncating swapped pages
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 6dbaf22ce1f1dfba33313198eb5bd989ae76dd87 upstream.
+
+Page cache radix tree slots are usually stabilized by the page lock, but
+shmem's swap cookies have no such thing.  Because the overall truncation
+loop is lockless, the swap entry is currently confirmed by a tree lookup
+and then deleted by another tree lookup under the same tree lock region.
+
+Use radix_tree_delete_item() instead, which does the verification and
+deletion with only one lookup.  This also allows removing the
+delete-only special case from shmem_radix_tree_replace().
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Minchan Kim <minchan@kernel.org>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Bob Liu <bob.liu@oracle.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Luigi Semenzato <semenzato@google.com>
+Cc: Metin Doslu <metin@citusdata.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Ozgun Erdogan <ozgun@citusdata.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Roman Gushchin <klamm@yandex-team.ru>
+Cc: Ryan Mallon <rmallon@gmail.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/shmem.c |   25 ++++++++++++-------------
+ 1 file changed, 12 insertions(+), 13 deletions(-)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -243,19 +243,17 @@ static int shmem_radix_tree_replace(stru
+                       pgoff_t index, void *expected, void *replacement)
+ {
+       void **pslot;
+-      void *item = NULL;
++      void *item;
+       VM_BUG_ON(!expected);
++      VM_BUG_ON(!replacement);
+       pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
+-      if (pslot)
+-              item = radix_tree_deref_slot_protected(pslot,
+-                                                      &mapping->tree_lock);
++      if (!pslot)
++              return -ENOENT;
++      item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock);
+       if (item != expected)
+               return -ENOENT;
+-      if (replacement)
+-              radix_tree_replace_slot(pslot, replacement);
+-      else
+-              radix_tree_delete(&mapping->page_tree, index);
++      radix_tree_replace_slot(pslot, replacement);
+       return 0;
+ }
+@@ -387,14 +385,15 @@ export:
+ static int shmem_free_swap(struct address_space *mapping,
+                          pgoff_t index, void *radswap)
+ {
+-      int error;
++      void *old;
+       spin_lock_irq(&mapping->tree_lock);
+-      error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
++      old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
+       spin_unlock_irq(&mapping->tree_lock);
+-      if (!error)
+-              free_swap_and_cache(radix_to_swp_entry(radswap));
+-      return error;
++      if (old != radswap)
++              return -ENOENT;
++      free_swap_and_cache(radix_to_swp_entry(radswap));
++      return 0;
+ }
+ /*
index e2966a66c5d981604baaa7ac4da63ac1247b19e0..d6cc0dd85b1ad236955f3fc20c4d1ae3b2319ef1 100644 (file)
@@ -96,3 +96,14 @@ net-sctp-fix-panic-on-duplicate-asconf-chunks.patch
 net-sctp-fix-skb_over_panic-when-receiving-malformed-asconf-chunks.patch
 iwlwifi-configure-the-ltr.patch
 regmap-fix-kernel-hang-on-regmap_bulk_write-with-zero-val_count.patch
+lib-radix-tree-add-radix_tree_delete_item.patch
+mm-shmem-save-one-radix-tree-lookup-when-truncating-swapped-pages.patch
+mm-filemap-move-radix-tree-hole-searching-here.patch
+mm-fs-prepare-for-non-page-entries-in-page-cache-radix-trees.patch
+mm-madvise-fix-madv_willneed-on-shmem-swapouts.patch
+mm-remove-read_cache_page_async.patch
+callers-of-iov_copy_from_user_atomic-don-t-need.patch
+mm-readahead.c-inline-ra_submit.patch
+mm-compaction-clean-up-unused-code-lines.patch
+mm-compaction-cleanup-isolate_freepages.patch
+mm-migration-add-destination-page-freeing-callback.patch