--- /dev/null
+From 9e8c2af96e0d2d5fe298dd796fb6bc16e888a48d Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Sun, 2 Feb 2014 22:10:25 -0500
+Subject: callers of iov_copy_from_user_atomic() don't need pagecache_disable()
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 9e8c2af96e0d2d5fe298dd796fb6bc16e888a48d upstream.
+
+... it does that itself (via kmap_atomic())
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c | 5 -----
+ fs/fuse/file.c | 2 --
+ mm/filemap.c | 3 ---
+ 3 files changed, 10 deletions(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -425,13 +425,8 @@ static noinline int btrfs_copy_from_user
+ struct page *page = prepared_pages[pg];
+ /*
+ * Copy data from userspace to the current page
+- *
+- * Disable pagefault to avoid recursive lock since
+- * the pages are already locked
+ */
+- pagefault_disable();
+ copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
+- pagefault_enable();
+
+ /* Flush processor's dcache for this page */
+ flush_dcache_page(page);
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -1003,9 +1003,7 @@ static ssize_t fuse_fill_write_pages(str
+ if (mapping_writably_mapped(mapping))
+ flush_dcache_page(page);
+
+- pagefault_disable();
+ tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
+- pagefault_enable();
+ flush_dcache_page(page);
+
+ mark_page_accessed(page);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -2188,7 +2188,6 @@ size_t iov_iter_copy_from_user_atomic(st
+ char *kaddr;
+ size_t copied;
+
+- BUG_ON(!in_atomic());
+ kaddr = kmap_atomic(page);
+ if (likely(i->nr_segs == 1)) {
+ int left;
+@@ -2562,9 +2561,7 @@ again:
+ if (mapping_writably_mapped(mapping))
+ flush_dcache_page(page);
+
+- pagefault_disable();
+ copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
+- pagefault_enable();
+ flush_dcache_page(page);
+
+ mark_page_accessed(page);
--- /dev/null
+From 53c59f262d747ea82e7414774c59a489501186a0 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 3 Apr 2014 14:47:39 -0700
+Subject: lib: radix-tree: add radix_tree_delete_item()
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 53c59f262d747ea82e7414774c59a489501186a0 upstream.
+
+Provide a function that does not just delete an entry at a given index,
+but also allows passing in an expected item. Delete only if that item
+is still located at the specified index.
+
+This is handy when lockless tree traversals want to delete entries as
+well because they don't have to do an second, locked lookup to verify
+the slot has not changed under them before deleting the entry.
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Minchan Kim <minchan@kernel.org>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Bob Liu <bob.liu@oracle.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Luigi Semenzato <semenzato@google.com>
+Cc: Metin Doslu <metin@citusdata.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Ozgun Erdogan <ozgun@citusdata.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Roman Gushchin <klamm@yandex-team.ru>
+Cc: Ryan Mallon <rmallon@gmail.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/radix-tree.h | 1 +
+ lib/radix-tree.c | 31 +++++++++++++++++++++++++++----
+ 2 files changed, 28 insertions(+), 4 deletions(-)
+
+--- a/include/linux/radix-tree.h
++++ b/include/linux/radix-tree.h
+@@ -219,6 +219,7 @@ static inline void radix_tree_replace_sl
+ int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
+ void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
+ void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
++void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
+ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
+ unsigned int
+ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+--- a/lib/radix-tree.c
++++ b/lib/radix-tree.c
+@@ -1337,15 +1337,18 @@ static inline void radix_tree_shrink(str
+ }
+
+ /**
+- * radix_tree_delete - delete an item from a radix tree
++ * radix_tree_delete_item - delete an item from a radix tree
+ * @root: radix tree root
+ * @index: index key
++ * @item: expected item
+ *
+- * Remove the item at @index from the radix tree rooted at @root.
++ * Remove @item at @index from the radix tree rooted at @root.
+ *
+- * Returns the address of the deleted item, or NULL if it was not present.
++ * Returns the address of the deleted item, or NULL if it was not present
++ * or the entry at the given @index was not @item.
+ */
+-void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
++void *radix_tree_delete_item(struct radix_tree_root *root,
++ unsigned long index, void *item)
+ {
+ struct radix_tree_node *node = NULL;
+ struct radix_tree_node *slot = NULL;
+@@ -1380,6 +1383,11 @@ void *radix_tree_delete(struct radix_tre
+ if (slot == NULL)
+ goto out;
+
++ if (item && slot != item) {
++ slot = NULL;
++ goto out;
++ }
++
+ /*
+ * Clear all tags associated with the item to be deleted.
+ * This way of doing it would be inefficient, but seldom is any set.
+@@ -1424,6 +1432,21 @@ void *radix_tree_delete(struct radix_tre
+ out:
+ return slot;
+ }
++EXPORT_SYMBOL(radix_tree_delete_item);
++
++/**
++ * radix_tree_delete - delete an item from a radix tree
++ * @root: radix tree root
++ * @index: index key
++ *
++ * Remove the item at @index from the radix tree rooted at @root.
++ *
++ * Returns the address of the deleted item, or NULL if it was not present.
++ */
++void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
++{
++ return radix_tree_delete_item(root, index, NULL);
++}
+ EXPORT_SYMBOL(radix_tree_delete);
+
+ /**
--- /dev/null
+From 13fb44e4b0414d7e718433a49e6430d5b76bd46e Mon Sep 17 00:00:00 2001
+From: Heesub Shin <heesub.shin@samsung.com>
+Date: Wed, 4 Jun 2014 16:07:24 -0700
+Subject: mm/compaction: clean up unused code lines
+
+From: Heesub Shin <heesub.shin@samsung.com>
+
+commit 13fb44e4b0414d7e718433a49e6430d5b76bd46e upstream.
+
+Remove code lines currently not in use or never called.
+
+Signed-off-by: Heesub Shin <heesub.shin@samsung.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Dongjun Shin <d.j.shin@samsung.com>
+Cc: Sunghwan Yun <sunghwan.yun@samsung.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+Cc: Michal Nazarewicz <mina86@mina86.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Dongjun Shin <d.j.shin@samsung.com>
+Cc: Sunghwan Yun <sunghwan.yun@samsung.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -208,12 +208,6 @@ static bool compact_checklock_irqsave(sp
+ return true;
+ }
+
+-static inline bool compact_trylock_irqsave(spinlock_t *lock,
+- unsigned long *flags, struct compact_control *cc)
+-{
+- return compact_checklock_irqsave(lock, flags, false, cc);
+-}
+-
+ /* Returns true if the page is within a block suitable for migration to */
+ static bool suitable_migration_target(struct page *page)
+ {
+@@ -736,7 +730,6 @@ static void isolate_freepages(struct zon
+ continue;
+
+ /* Found a block suitable for isolating free pages from */
+- isolated = 0;
+
+ /*
+ * Take care when isolating in last pageblock of a zone which
+@@ -1165,9 +1158,6 @@ static void __compact_pgdat(pg_data_t *p
+ if (zone_watermark_ok(zone, cc->order,
+ low_wmark_pages(zone), 0, 0))
+ compaction_defer_reset(zone, cc->order, false);
+- /* Currently async compaction is never deferred. */
+- else if (cc->sync)
+- defer_compaction(zone, cc->order);
+ }
+
+ VM_BUG_ON(!list_empty(&cc->freepages));
--- /dev/null
+From c96b9e508f3d06ddb601dcc9792d62c044ab359e Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Wed, 4 Jun 2014 16:07:26 -0700
+Subject: mm/compaction: cleanup isolate_freepages()
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit c96b9e508f3d06ddb601dcc9792d62c044ab359e upstream.
+
+isolate_freepages() is currently somewhat hard to follow thanks to many
+looks like it is related to the 'low_pfn' variable, but in fact it is not.
+
+This patch renames the 'high_pfn' variable to a hopefully less confusing name,
+and slightly changes its handling without a functional change. A comment made
+obsolete by recent changes is also updated.
+
+[akpm@linux-foundation.org: comment fixes, per Minchan]
+[iamjoonsoo.kim@lge.com: cleanups]
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+Cc: Michal Nazarewicz <mina86@mina86.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Dongjun Shin <d.j.shin@samsung.com>
+Cc: Sunghwan Yun <sunghwan.yun@samsung.com>
+Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c | 56 +++++++++++++++++++++++++++-----------------------------
+ 1 file changed, 27 insertions(+), 29 deletions(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -665,7 +665,10 @@ static void isolate_freepages(struct zon
+ struct compact_control *cc)
+ {
+ struct page *page;
+- unsigned long high_pfn, low_pfn, pfn, z_end_pfn;
++ unsigned long block_start_pfn; /* start of current pageblock */
++ unsigned long block_end_pfn; /* end of current pageblock */
++ unsigned long low_pfn; /* lowest pfn scanner is able to scan */
++ unsigned long next_free_pfn; /* start pfn for scaning at next round */
+ int nr_freepages = cc->nr_freepages;
+ struct list_head *freelist = &cc->freepages;
+
+@@ -673,32 +676,33 @@ static void isolate_freepages(struct zon
+ * Initialise the free scanner. The starting point is where we last
+ * successfully isolated from, zone-cached value, or the end of the
+ * zone when isolating for the first time. We need this aligned to
+- * the pageblock boundary, because we do pfn -= pageblock_nr_pages
+- * in the for loop.
++ * the pageblock boundary, because we do
++ * block_start_pfn -= pageblock_nr_pages in the for loop.
++ * For ending point, take care when isolating in last pageblock of a
++ * a zone which ends in the middle of a pageblock.
+ * The low boundary is the end of the pageblock the migration scanner
+ * is using.
+ */
+- pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
++ block_start_pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
++ block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
++ zone_end_pfn(zone));
+ low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages);
+
+ /*
+- * Take care that if the migration scanner is at the end of the zone
+- * that the free scanner does not accidentally move to the next zone
+- * in the next isolation cycle.
++ * If no pages are isolated, the block_start_pfn < low_pfn check
++ * will kick in.
+ */
+- high_pfn = min(low_pfn, pfn);
+-
+- z_end_pfn = zone_end_pfn(zone);
++ next_free_pfn = 0;
+
+ /*
+ * Isolate free pages until enough are available to migrate the
+ * pages on cc->migratepages. We stop searching if the migrate
+ * and free page scanners meet or enough free pages are isolated.
+ */
+- for (; pfn >= low_pfn && cc->nr_migratepages > nr_freepages;
+- pfn -= pageblock_nr_pages) {
++ for (; block_start_pfn >= low_pfn && cc->nr_migratepages > nr_freepages;
++ block_end_pfn = block_start_pfn,
++ block_start_pfn -= pageblock_nr_pages) {
+ unsigned long isolated;
+- unsigned long end_pfn;
+
+ /*
+ * This can iterate a massively long zone without finding any
+@@ -707,7 +711,7 @@ static void isolate_freepages(struct zon
+ */
+ cond_resched();
+
+- if (!pfn_valid(pfn))
++ if (!pfn_valid(block_start_pfn))
+ continue;
+
+ /*
+@@ -717,7 +721,7 @@ static void isolate_freepages(struct zon
+ * i.e. it's possible that all pages within a zones range of
+ * pages do not belong to a single zone.
+ */
+- page = pfn_to_page(pfn);
++ page = pfn_to_page(block_start_pfn);
+ if (page_zone(page) != zone)
+ continue;
+
+@@ -730,14 +734,8 @@ static void isolate_freepages(struct zon
+ continue;
+
+ /* Found a block suitable for isolating free pages from */
+-
+- /*
+- * Take care when isolating in last pageblock of a zone which
+- * ends in the middle of a pageblock.
+- */
+- end_pfn = min(pfn + pageblock_nr_pages, z_end_pfn);
+- isolated = isolate_freepages_block(cc, pfn, end_pfn,
+- freelist, false);
++ isolated = isolate_freepages_block(cc, block_start_pfn,
++ block_end_pfn, freelist, false);
+ nr_freepages += isolated;
+
+ /*
+@@ -745,9 +743,9 @@ static void isolate_freepages(struct zon
+ * looking for free pages, the search will restart here as
+ * page migration may have returned some pages to the allocator
+ */
+- if (isolated) {
++ if (isolated && next_free_pfn == 0) {
+ cc->finished_update_free = true;
+- high_pfn = max(high_pfn, pfn);
++ next_free_pfn = block_start_pfn;
+ }
+ }
+
+@@ -758,10 +756,10 @@ static void isolate_freepages(struct zon
+ * If we crossed the migrate scanner, we want to keep it that way
+ * so that compact_finished() may detect this
+ */
+- if (pfn < low_pfn)
+- cc->free_pfn = max(pfn, zone->zone_start_pfn);
+- else
+- cc->free_pfn = high_pfn;
++ if (block_start_pfn < low_pfn)
++ next_free_pfn = cc->migrate_pfn;
++
++ cc->free_pfn = next_free_pfn;
+ cc->nr_freepages = nr_freepages;
+ }
+
--- /dev/null
+From e7b563bb2a6f4d974208da46200784b9c5b5a47e Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 3 Apr 2014 14:47:44 -0700
+Subject: mm: filemap: move radix tree hole searching here
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit e7b563bb2a6f4d974208da46200784b9c5b5a47e upstream.
+
+The radix tree hole searching code is only used for page cache, for
+example the readahead code trying to get a a picture of the area
+surrounding a fault.
+
+It sufficed to rely on the radix tree definition of holes, which is
+"empty tree slot". But this is about to change, though, as shadow page
+descriptors will be stored in the page cache after the actual pages get
+evicted from memory.
+
+Move the functions over to mm/filemap.c and make them native page cache
+operations, where they can later be adapted to handle the new definition
+of "page cache hole".
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Reviewed-by: Minchan Kim <minchan@kernel.org>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Bob Liu <bob.liu@oracle.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Luigi Semenzato <semenzato@google.com>
+Cc: Metin Doslu <metin@citusdata.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Ozgun Erdogan <ozgun@citusdata.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Roman Gushchin <klamm@yandex-team.ru>
+Cc: Ryan Mallon <rmallon@gmail.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/blocklayout/blocklayout.c | 2 -
+ include/linux/pagemap.h | 5 ++
+ include/linux/radix-tree.h | 4 --
+ lib/radix-tree.c | 75 --------------------------------------
+ mm/filemap.c | 76 +++++++++++++++++++++++++++++++++++++++
+ mm/readahead.c | 4 +-
+ 6 files changed, 84 insertions(+), 82 deletions(-)
+
+--- a/fs/nfs/blocklayout/blocklayout.c
++++ b/fs/nfs/blocklayout/blocklayout.c
+@@ -1213,7 +1213,7 @@ static u64 pnfs_num_cont_bytes(struct in
+ end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
+ if (end != NFS_I(inode)->npages) {
+ rcu_read_lock();
+- end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX);
++ end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
+ rcu_read_unlock();
+ }
+
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -243,6 +243,11 @@ static inline struct page *page_cache_al
+
+ typedef int filler_t(void *, struct page *);
+
++pgoff_t page_cache_next_hole(struct address_space *mapping,
++ pgoff_t index, unsigned long max_scan);
++pgoff_t page_cache_prev_hole(struct address_space *mapping,
++ pgoff_t index, unsigned long max_scan);
++
+ extern struct page * find_get_page(struct address_space *mapping,
+ pgoff_t index);
+ extern struct page * find_lock_page(struct address_space *mapping,
+--- a/include/linux/radix-tree.h
++++ b/include/linux/radix-tree.h
+@@ -227,10 +227,6 @@ radix_tree_gang_lookup(struct radix_tree
+ unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
+ void ***results, unsigned long *indices,
+ unsigned long first_index, unsigned int max_items);
+-unsigned long radix_tree_next_hole(struct radix_tree_root *root,
+- unsigned long index, unsigned long max_scan);
+-unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
+- unsigned long index, unsigned long max_scan);
+ int radix_tree_preload(gfp_t gfp_mask);
+ int radix_tree_maybe_preload(gfp_t gfp_mask);
+ void radix_tree_init(void);
+--- a/lib/radix-tree.c
++++ b/lib/radix-tree.c
+@@ -946,81 +946,6 @@ next:
+ }
+ EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
+
+-
+-/**
+- * radix_tree_next_hole - find the next hole (not-present entry)
+- * @root: tree root
+- * @index: index key
+- * @max_scan: maximum range to search
+- *
+- * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
+- * indexed hole.
+- *
+- * Returns: the index of the hole if found, otherwise returns an index
+- * outside of the set specified (in which case 'return - index >= max_scan'
+- * will be true). In rare cases of index wrap-around, 0 will be returned.
+- *
+- * radix_tree_next_hole may be called under rcu_read_lock. However, like
+- * radix_tree_gang_lookup, this will not atomically search a snapshot of
+- * the tree at a single point in time. For example, if a hole is created
+- * at index 5, then subsequently a hole is created at index 10,
+- * radix_tree_next_hole covering both indexes may return 10 if called
+- * under rcu_read_lock.
+- */
+-unsigned long radix_tree_next_hole(struct radix_tree_root *root,
+- unsigned long index, unsigned long max_scan)
+-{
+- unsigned long i;
+-
+- for (i = 0; i < max_scan; i++) {
+- if (!radix_tree_lookup(root, index))
+- break;
+- index++;
+- if (index == 0)
+- break;
+- }
+-
+- return index;
+-}
+-EXPORT_SYMBOL(radix_tree_next_hole);
+-
+-/**
+- * radix_tree_prev_hole - find the prev hole (not-present entry)
+- * @root: tree root
+- * @index: index key
+- * @max_scan: maximum range to search
+- *
+- * Search backwards in the range [max(index-max_scan+1, 0), index]
+- * for the first hole.
+- *
+- * Returns: the index of the hole if found, otherwise returns an index
+- * outside of the set specified (in which case 'index - return >= max_scan'
+- * will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
+- *
+- * radix_tree_next_hole may be called under rcu_read_lock. However, like
+- * radix_tree_gang_lookup, this will not atomically search a snapshot of
+- * the tree at a single point in time. For example, if a hole is created
+- * at index 10, then subsequently a hole is created at index 5,
+- * radix_tree_prev_hole covering both indexes may return 5 if called under
+- * rcu_read_lock.
+- */
+-unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
+- unsigned long index, unsigned long max_scan)
+-{
+- unsigned long i;
+-
+- for (i = 0; i < max_scan; i++) {
+- if (!radix_tree_lookup(root, index))
+- break;
+- index--;
+- if (index == ULONG_MAX)
+- break;
+- }
+-
+- return index;
+-}
+-EXPORT_SYMBOL(radix_tree_prev_hole);
+-
+ /**
+ * radix_tree_gang_lookup - perform multiple lookup on a radix tree
+ * @root: radix tree root
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -688,6 +688,82 @@ int __lock_page_or_retry(struct page *pa
+ }
+
+ /**
++ * page_cache_next_hole - find the next hole (not-present entry)
++ * @mapping: mapping
++ * @index: index
++ * @max_scan: maximum range to search
++ *
++ * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the
++ * lowest indexed hole.
++ *
++ * Returns: the index of the hole if found, otherwise returns an index
++ * outside of the set specified (in which case 'return - index >=
++ * max_scan' will be true). In rare cases of index wrap-around, 0 will
++ * be returned.
++ *
++ * page_cache_next_hole may be called under rcu_read_lock. However,
++ * like radix_tree_gang_lookup, this will not atomically search a
++ * snapshot of the tree at a single point in time. For example, if a
++ * hole is created at index 5, then subsequently a hole is created at
++ * index 10, page_cache_next_hole covering both indexes may return 10
++ * if called under rcu_read_lock.
++ */
++pgoff_t page_cache_next_hole(struct address_space *mapping,
++ pgoff_t index, unsigned long max_scan)
++{
++ unsigned long i;
++
++ for (i = 0; i < max_scan; i++) {
++ if (!radix_tree_lookup(&mapping->page_tree, index))
++ break;
++ index++;
++ if (index == 0)
++ break;
++ }
++
++ return index;
++}
++EXPORT_SYMBOL(page_cache_next_hole);
++
++/**
++ * page_cache_prev_hole - find the prev hole (not-present entry)
++ * @mapping: mapping
++ * @index: index
++ * @max_scan: maximum range to search
++ *
++ * Search backwards in the range [max(index-max_scan+1, 0), index] for
++ * the first hole.
++ *
++ * Returns: the index of the hole if found, otherwise returns an index
++ * outside of the set specified (in which case 'index - return >=
++ * max_scan' will be true). In rare cases of wrap-around, ULONG_MAX
++ * will be returned.
++ *
++ * page_cache_prev_hole may be called under rcu_read_lock. However,
++ * like radix_tree_gang_lookup, this will not atomically search a
++ * snapshot of the tree at a single point in time. For example, if a
++ * hole is created at index 10, then subsequently a hole is created at
++ * index 5, page_cache_prev_hole covering both indexes may return 5 if
++ * called under rcu_read_lock.
++ */
++pgoff_t page_cache_prev_hole(struct address_space *mapping,
++ pgoff_t index, unsigned long max_scan)
++{
++ unsigned long i;
++
++ for (i = 0; i < max_scan; i++) {
++ if (!radix_tree_lookup(&mapping->page_tree, index))
++ break;
++ index--;
++ if (index == ULONG_MAX)
++ break;
++ }
++
++ return index;
++}
++EXPORT_SYMBOL(page_cache_prev_hole);
++
++/**
+ * find_get_page - find and get a page reference
+ * @mapping: the address_space to search
+ * @offset: the page index
+--- a/mm/readahead.c
++++ b/mm/readahead.c
+@@ -347,7 +347,7 @@ static pgoff_t count_history_pages(struc
+ pgoff_t head;
+
+ rcu_read_lock();
+- head = radix_tree_prev_hole(&mapping->page_tree, offset - 1, max);
++ head = page_cache_prev_hole(mapping, offset - 1, max);
+ rcu_read_unlock();
+
+ return offset - 1 - head;
+@@ -427,7 +427,7 @@ ondemand_readahead(struct address_space
+ pgoff_t start;
+
+ rcu_read_lock();
+- start = radix_tree_next_hole(&mapping->page_tree, offset+1,max);
++ start = page_cache_next_hole(mapping, offset + 1, max);
+ rcu_read_unlock();
+
+ if (!start || start - offset > max)
--- /dev/null
+From 0cd6144aadd2afd19d1aca880153530c52957604 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 3 Apr 2014 14:47:46 -0700
+Subject: mm + fs: prepare for non-page entries in page cache radix trees
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 0cd6144aadd2afd19d1aca880153530c52957604 upstream.
+
+shmem mappings already contain exceptional entries where swap slot
+information is remembered.
+
+To be able to store eviction information for regular page cache, prepare
+every site dealing with the radix trees directly to handle entries other
+than pages.
+
+The common lookup functions will filter out non-page entries and return
+NULL for page cache holes, just as before. But provide a raw version of
+the API which returns non-page entries as well, and switch shmem over to
+use it.
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Reviewed-by: Minchan Kim <minchan@kernel.org>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Bob Liu <bob.liu@oracle.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Luigi Semenzato <semenzato@google.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Metin Doslu <metin@citusdata.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Ozgun Erdogan <ozgun@citusdata.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Roman Gushchin <klamm@yandex-team.ru>
+Cc: Ryan Mallon <rmallon@gmail.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/compression.c | 2
+ include/linux/mm.h | 8 +
+ include/linux/pagemap.h | 15 ++-
+ include/linux/pagevec.h | 5 +
+ include/linux/shmem_fs.h | 1
+ mm/filemap.c | 202 +++++++++++++++++++++++++++++++++++++++++------
+ mm/mincore.c | 20 +++-
+ mm/readahead.c | 2
+ mm/shmem.c | 97 ++++------------------
+ mm/swap.c | 51 +++++++++++
+ mm/truncate.c | 74 ++++++++++++++---
+ 11 files changed, 348 insertions(+), 129 deletions(-)
+
+--- a/fs/btrfs/compression.c
++++ b/fs/btrfs/compression.c
+@@ -472,7 +472,7 @@ static noinline int add_ra_bio_pages(str
+ rcu_read_lock();
+ page = radix_tree_lookup(&mapping->page_tree, pg_index);
+ rcu_read_unlock();
+- if (page) {
++ if (page && !radix_tree_exceptional_entry(page)) {
+ misses++;
+ if (misses > 4)
+ break;
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1041,6 +1041,14 @@ extern void show_free_areas(unsigned int
+ extern bool skip_free_areas_node(unsigned int flags, int nid);
+
+ int shmem_zero_setup(struct vm_area_struct *);
++#ifdef CONFIG_SHMEM
++bool shmem_mapping(struct address_space *mapping);
++#else
++static inline bool shmem_mapping(struct address_space *mapping)
++{
++ return false;
++}
++#endif
+
+ extern int can_do_mlock(void);
+ extern int user_shm_lock(size_t, struct user_struct *);
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -248,12 +248,15 @@ pgoff_t page_cache_next_hole(struct addr
+ pgoff_t page_cache_prev_hole(struct address_space *mapping,
+ pgoff_t index, unsigned long max_scan);
+
+-extern struct page * find_get_page(struct address_space *mapping,
+- pgoff_t index);
+-extern struct page * find_lock_page(struct address_space *mapping,
+- pgoff_t index);
+-extern struct page * find_or_create_page(struct address_space *mapping,
+- pgoff_t index, gfp_t gfp_mask);
++struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
++struct page *find_get_page(struct address_space *mapping, pgoff_t offset);
++struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
++struct page *find_lock_page(struct address_space *mapping, pgoff_t offset);
++struct page *find_or_create_page(struct address_space *mapping, pgoff_t index,
++ gfp_t gfp_mask);
++unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
++ unsigned int nr_entries, struct page **entries,
++ pgoff_t *indices);
+ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
+ unsigned int nr_pages, struct page **pages);
+ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
+--- a/include/linux/pagevec.h
++++ b/include/linux/pagevec.h
+@@ -22,6 +22,11 @@ struct pagevec {
+
+ void __pagevec_release(struct pagevec *pvec);
+ void __pagevec_lru_add(struct pagevec *pvec);
++unsigned pagevec_lookup_entries(struct pagevec *pvec,
++ struct address_space *mapping,
++ pgoff_t start, unsigned nr_entries,
++ pgoff_t *indices);
++void pagevec_remove_exceptionals(struct pagevec *pvec);
+ unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
+ pgoff_t start, unsigned nr_pages);
+ unsigned pagevec_lookup_tag(struct pagevec *pvec,
+--- a/include/linux/shmem_fs.h
++++ b/include/linux/shmem_fs.h
+@@ -51,6 +51,7 @@ extern struct file *shmem_kernel_file_se
+ unsigned long flags);
+ extern int shmem_zero_setup(struct vm_area_struct *);
+ extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
++extern bool shmem_mapping(struct address_space *mapping);
+ extern void shmem_unlock_mapping(struct address_space *mapping);
+ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
+ pgoff_t index, gfp_t gfp_mask);
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -448,6 +448,29 @@ int replace_page_cache_page(struct page
+ }
+ EXPORT_SYMBOL_GPL(replace_page_cache_page);
+
++static int page_cache_tree_insert(struct address_space *mapping,
++ struct page *page)
++{
++ void **slot;
++ int error;
++
++ slot = radix_tree_lookup_slot(&mapping->page_tree, page->index);
++ if (slot) {
++ void *p;
++
++ p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
++ if (!radix_tree_exceptional_entry(p))
++ return -EEXIST;
++ radix_tree_replace_slot(slot, page);
++ mapping->nrpages++;
++ return 0;
++ }
++ error = radix_tree_insert(&mapping->page_tree, page->index, page);
++ if (!error)
++ mapping->nrpages++;
++ return error;
++}
++
+ /**
+ * add_to_page_cache_locked - add a locked page to the pagecache
+ * @page: page to add
+@@ -482,11 +505,10 @@ int add_to_page_cache_locked(struct page
+ page->index = offset;
+
+ spin_lock_irq(&mapping->tree_lock);
+- error = radix_tree_insert(&mapping->page_tree, offset, page);
++ error = page_cache_tree_insert(mapping, page);
+ radix_tree_preload_end();
+ if (unlikely(error))
+ goto err_insert;
+- mapping->nrpages++;
+ __inc_zone_page_state(page, NR_FILE_PAGES);
+ spin_unlock_irq(&mapping->tree_lock);
+ trace_mm_filemap_add_to_page_cache(page);
+@@ -714,7 +736,10 @@ pgoff_t page_cache_next_hole(struct addr
+ unsigned long i;
+
+ for (i = 0; i < max_scan; i++) {
+- if (!radix_tree_lookup(&mapping->page_tree, index))
++ struct page *page;
++
++ page = radix_tree_lookup(&mapping->page_tree, index);
++ if (!page || radix_tree_exceptional_entry(page))
+ break;
+ index++;
+ if (index == 0)
+@@ -752,7 +777,10 @@ pgoff_t page_cache_prev_hole(struct addr
+ unsigned long i;
+
+ for (i = 0; i < max_scan; i++) {
+- if (!radix_tree_lookup(&mapping->page_tree, index))
++ struct page *page;
++
++ page = radix_tree_lookup(&mapping->page_tree, index);
++ if (!page || radix_tree_exceptional_entry(page))
+ break;
+ index--;
+ if (index == ULONG_MAX)
+@@ -764,14 +792,19 @@ pgoff_t page_cache_prev_hole(struct addr
+ EXPORT_SYMBOL(page_cache_prev_hole);
+
+ /**
+- * find_get_page - find and get a page reference
++ * find_get_entry - find and get a page cache entry
+ * @mapping: the address_space to search
+- * @offset: the page index
++ * @offset: the page cache index
++ *
++ * Looks up the page cache slot at @mapping & @offset. If there is a
++ * page cache page, it is returned with an increased refcount.
++ *
++ * If the slot holds a shadow entry of a previously evicted page, it
++ * is returned.
+ *
+- * Is there a pagecache struct page at the given (mapping, offset) tuple?
+- * If yes, increment its refcount and return it; if no, return NULL.
++ * Otherwise, %NULL is returned.
+ */
+-struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
++struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
+ {
+ void **pagep;
+ struct page *page;
+@@ -812,24 +845,50 @@ out:
+
+ return page;
+ }
+-EXPORT_SYMBOL(find_get_page);
++EXPORT_SYMBOL(find_get_entry);
+
+ /**
+- * find_lock_page - locate, pin and lock a pagecache page
++ * find_get_page - find and get a page reference
+ * @mapping: the address_space to search
+ * @offset: the page index
+ *
+- * Locates the desired pagecache page, locks it, increments its reference
+- * count and returns its address.
++ * Looks up the page cache slot at @mapping & @offset. If there is a
++ * page cache page, it is returned with an increased refcount.
+ *
+- * Returns zero if the page was not present. find_lock_page() may sleep.
++ * Otherwise, %NULL is returned.
+ */
+-struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
++struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
++{
++ struct page *page = find_get_entry(mapping, offset);
++
++ if (radix_tree_exceptional_entry(page))
++ page = NULL;
++ return page;
++}
++EXPORT_SYMBOL(find_get_page);
++
++/**
++ * find_lock_entry - locate, pin and lock a page cache entry
++ * @mapping: the address_space to search
++ * @offset: the page cache index
++ *
++ * Looks up the page cache slot at @mapping & @offset. If there is a
++ * page cache page, it is returned locked and with an increased
++ * refcount.
++ *
++ * If the slot holds a shadow entry of a previously evicted page, it
++ * is returned.
++ *
++ * Otherwise, %NULL is returned.
++ *
++ * find_lock_entry() may sleep.
++ */
++struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
+ {
+ struct page *page;
+
+ repeat:
+- page = find_get_page(mapping, offset);
++ page = find_get_entry(mapping, offset);
+ if (page && !radix_tree_exception(page)) {
+ lock_page(page);
+ /* Has the page been truncated? */
+@@ -842,6 +901,29 @@ repeat:
+ }
+ return page;
+ }
++EXPORT_SYMBOL(find_lock_entry);
++
++/**
++ * find_lock_page - locate, pin and lock a pagecache page
++ * @mapping: the address_space to search
++ * @offset: the page index
++ *
++ * Looks up the page cache slot at @mapping & @offset. If there is a
++ * page cache page, it is returned locked and with an increased
++ * refcount.
++ *
++ * Otherwise, %NULL is returned.
++ *
++ * find_lock_page() may sleep.
++ */
++struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
++{
++ struct page *page = find_lock_entry(mapping, offset);
++
++ if (radix_tree_exceptional_entry(page))
++ page = NULL;
++ return page;
++}
+ EXPORT_SYMBOL(find_lock_page);
+
+ /**
+@@ -850,16 +932,18 @@ EXPORT_SYMBOL(find_lock_page);
+ * @index: the page's index into the mapping
+ * @gfp_mask: page allocation mode
+ *
+- * Locates a page in the pagecache. If the page is not present, a new page
+- * is allocated using @gfp_mask and is added to the pagecache and to the VM's
+- * LRU list. The returned page is locked and has its reference count
+- * incremented.
++ * Looks up the page cache slot at @mapping & @offset. If there is a
++ * page cache page, it is returned locked and with an increased
++ * refcount.
++ *
++ * If the page is not present, a new page is allocated using @gfp_mask
++ * and added to the page cache and the VM's LRU list. The page is
++ * returned locked and with an increased refcount.
+ *
+- * find_or_create_page() may sleep, even if @gfp_flags specifies an atomic
+- * allocation!
++ * On memory exhaustion, %NULL is returned.
+ *
+- * find_or_create_page() returns the desired page's address, or zero on
+- * memory exhaustion.
++ * find_or_create_page() may sleep, even if @gfp_flags specifies an
++ * atomic allocation!
+ */
+ struct page *find_or_create_page(struct address_space *mapping,
+ pgoff_t index, gfp_t gfp_mask)
+@@ -892,6 +976,76 @@ repeat:
+ EXPORT_SYMBOL(find_or_create_page);
+
+ /**
++ * find_get_entries - gang pagecache lookup
++ * @mapping: The address_space to search
++ * @start: The starting page cache index
++ * @nr_entries: The maximum number of entries
++ * @entries: Where the resulting entries are placed
++ * @indices: The cache indices corresponding to the entries in @entries
++ *
++ * find_get_entries() will search for and return a group of up to
++ * @nr_entries entries in the mapping. The entries are placed at
++ * @entries. find_get_entries() takes a reference against any actual
++ * pages it returns.
++ *
++ * The search returns a group of mapping-contiguous page cache entries
++ * with ascending indexes. There may be holes in the indices due to
++ * not-present pages.
++ *
++ * Any shadow entries of evicted pages are included in the returned
++ * array.
++ *
++ * find_get_entries() returns the number of pages and shadow entries
++ * which were found.
++ */
++unsigned find_get_entries(struct address_space *mapping,
++ pgoff_t start, unsigned int nr_entries,
++ struct page **entries, pgoff_t *indices)
++{
++ void **slot;
++ unsigned int ret = 0;
++ struct radix_tree_iter iter;
++
++ if (!nr_entries)
++ return 0;
++
++ rcu_read_lock();
++restart:
++ radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
++ struct page *page;
++repeat:
++ page = radix_tree_deref_slot(slot);
++ if (unlikely(!page))
++ continue;
++ if (radix_tree_exception(page)) {
++ if (radix_tree_deref_retry(page))
++ goto restart;
++ /*
++ * Otherwise, we must be storing a swap entry
++ * here as an exceptional entry: so return it
++ * without attempting to raise page count.
++ */
++ goto export;
++ }
++ if (!page_cache_get_speculative(page))
++ goto repeat;
++
++ /* Has the page moved? */
++ if (unlikely(page != *slot)) {
++ page_cache_release(page);
++ goto repeat;
++ }
++export:
++ indices[ret] = iter.index;
++ entries[ret] = page;
++ if (++ret == nr_entries)
++ break;
++ }
++ rcu_read_unlock();
++ return ret;
++}
++
++/**
+ * find_get_pages - gang pagecache lookup
+ * @mapping: The address_space to search
+ * @start: The starting page index
+--- a/mm/mincore.c
++++ b/mm/mincore.c
+@@ -70,13 +70,21 @@ static unsigned char mincore_page(struct
+ * any other file mapping (ie. marked !present and faulted in with
+ * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
+ */
+- page = find_get_page(mapping, pgoff);
+ #ifdef CONFIG_SWAP
+- /* shmem/tmpfs may return swap: account for swapcache page too. */
+- if (radix_tree_exceptional_entry(page)) {
+- swp_entry_t swap = radix_to_swp_entry(page);
+- page = find_get_page(swap_address_space(swap), swap.val);
+- }
++ if (shmem_mapping(mapping)) {
++ page = find_get_entry(mapping, pgoff);
++ /*
++ * shmem/tmpfs may return swap: account for swapcache
++ * page too.
++ */
++ if (radix_tree_exceptional_entry(page)) {
++ swp_entry_t swp = radix_to_swp_entry(page);
++ page = find_get_page(swap_address_space(swp), swp.val);
++ }
++ } else
++ page = find_get_page(mapping, pgoff);
++#else
++ page = find_get_page(mapping, pgoff);
+ #endif
+ if (page) {
+ present = PageUptodate(page);
+--- a/mm/readahead.c
++++ b/mm/readahead.c
+@@ -179,7 +179,7 @@ __do_page_cache_readahead(struct address
+ rcu_read_lock();
+ page = radix_tree_lookup(&mapping->page_tree, page_offset);
+ rcu_read_unlock();
+- if (page)
++ if (page && !radix_tree_exceptional_entry(page))
+ continue;
+
+ page = page_cache_alloc_readahead(mapping);
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -330,56 +330,6 @@ static void shmem_delete_from_page_cache
+ }
+
+ /*
+- * Like find_get_pages, but collecting swap entries as well as pages.
+- */
+-static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
+- pgoff_t start, unsigned int nr_pages,
+- struct page **pages, pgoff_t *indices)
+-{
+- void **slot;
+- unsigned int ret = 0;
+- struct radix_tree_iter iter;
+-
+- if (!nr_pages)
+- return 0;
+-
+- rcu_read_lock();
+-restart:
+- radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+- struct page *page;
+-repeat:
+- page = radix_tree_deref_slot(slot);
+- if (unlikely(!page))
+- continue;
+- if (radix_tree_exception(page)) {
+- if (radix_tree_deref_retry(page))
+- goto restart;
+- /*
+- * Otherwise, we must be storing a swap entry
+- * here as an exceptional entry: so return it
+- * without attempting to raise page count.
+- */
+- goto export;
+- }
+- if (!page_cache_get_speculative(page))
+- goto repeat;
+-
+- /* Has the page moved? */
+- if (unlikely(page != *slot)) {
+- page_cache_release(page);
+- goto repeat;
+- }
+-export:
+- indices[ret] = iter.index;
+- pages[ret] = page;
+- if (++ret == nr_pages)
+- break;
+- }
+- rcu_read_unlock();
+- return ret;
+-}
+-
+-/*
+ * Remove swap entry from radix tree, free the swap and its page cache.
+ */
+ static int shmem_free_swap(struct address_space *mapping,
+@@ -397,21 +347,6 @@ static int shmem_free_swap(struct addres
+ }
+
+ /*
+- * Pagevec may contain swap entries, so shuffle up pages before releasing.
+- */
+-static void shmem_deswap_pagevec(struct pagevec *pvec)
+-{
+- int i, j;
+-
+- for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
+- struct page *page = pvec->pages[i];
+- if (!radix_tree_exceptional_entry(page))
+- pvec->pages[j++] = page;
+- }
+- pvec->nr = j;
+-}
+-
+-/*
+ * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
+ */
+ void shmem_unlock_mapping(struct address_space *mapping)
+@@ -429,12 +364,12 @@ void shmem_unlock_mapping(struct address
+ * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
+ * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
+ */
+- pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+- PAGEVEC_SIZE, pvec.pages, indices);
++ pvec.nr = find_get_entries(mapping, index,
++ PAGEVEC_SIZE, pvec.pages, indices);
+ if (!pvec.nr)
+ break;
+ index = indices[pvec.nr - 1] + 1;
+- shmem_deswap_pagevec(&pvec);
++ pagevec_remove_exceptionals(&pvec);
+ check_move_unevictable_pages(pvec.pages, pvec.nr);
+ pagevec_release(&pvec);
+ cond_resched();
+@@ -466,9 +401,9 @@ static void shmem_undo_range(struct inod
+ pagevec_init(&pvec, 0);
+ index = start;
+ while (index < end) {
+- pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+- min(end - index, (pgoff_t)PAGEVEC_SIZE),
+- pvec.pages, indices);
++ pvec.nr = find_get_entries(mapping, index,
++ min(end - index, (pgoff_t)PAGEVEC_SIZE),
++ pvec.pages, indices);
+ if (!pvec.nr)
+ break;
+ mem_cgroup_uncharge_start();
+@@ -497,7 +432,7 @@ static void shmem_undo_range(struct inod
+ }
+ unlock_page(page);
+ }
+- shmem_deswap_pagevec(&pvec);
++ pagevec_remove_exceptionals(&pvec);
+ pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
+ cond_resched();
+@@ -535,9 +470,10 @@ static void shmem_undo_range(struct inod
+ index = start;
+ while (index < end) {
+ cond_resched();
+- pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
++
++ pvec.nr = find_get_entries(mapping, index,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE),
+- pvec.pages, indices);
++ pvec.pages, indices);
+ if (!pvec.nr) {
+ /* If all gone or hole-punch or unfalloc, we're done */
+ if (index == start || end != -1)
+@@ -580,7 +516,7 @@ static void shmem_undo_range(struct inod
+ }
+ unlock_page(page);
+ }
+- shmem_deswap_pagevec(&pvec);
++ pagevec_remove_exceptionals(&pvec);
+ pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
+ index++;
+@@ -1087,7 +1023,7 @@ static int shmem_getpage_gfp(struct inod
+ return -EFBIG;
+ repeat:
+ swap.val = 0;
+- page = find_lock_page(mapping, index);
++ page = find_lock_entry(mapping, index);
+ if (radix_tree_exceptional_entry(page)) {
+ swap = radix_to_swp_entry(page);
+ page = NULL;
+@@ -1482,6 +1418,11 @@ static struct inode *shmem_get_inode(str
+ return inode;
+ }
+
++bool shmem_mapping(struct address_space *mapping)
++{
++ return mapping->backing_dev_info == &shmem_backing_dev_info;
++}
++
+ #ifdef CONFIG_TMPFS
+ static const struct inode_operations shmem_symlink_inode_operations;
+ static const struct inode_operations shmem_short_symlink_operations;
+@@ -1794,7 +1735,7 @@ static pgoff_t shmem_seek_hole_data(stru
+ pagevec_init(&pvec, 0);
+ pvec.nr = 1; /* start small: we may be there already */
+ while (!done) {
+- pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
++ pvec.nr = find_get_entries(mapping, index,
+ pvec.nr, pvec.pages, indices);
+ if (!pvec.nr) {
+ if (whence == SEEK_DATA)
+@@ -1821,7 +1762,7 @@ static pgoff_t shmem_seek_hole_data(stru
+ break;
+ }
+ }
+- shmem_deswap_pagevec(&pvec);
++ pagevec_remove_exceptionals(&pvec);
+ pagevec_release(&pvec);
+ pvec.nr = PAGEVEC_SIZE;
+ cond_resched();
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -948,6 +948,57 @@ void __pagevec_lru_add(struct pagevec *p
+ EXPORT_SYMBOL(__pagevec_lru_add);
+
+ /**
++ * pagevec_lookup_entries - gang pagecache lookup
++ * @pvec: Where the resulting entries are placed
++ * @mapping: The address_space to search
++ * @start: The starting entry index
++ * @nr_entries: The maximum number of entries
++ * @indices: The cache indices corresponding to the entries in @pvec
++ *
++ * pagevec_lookup_entries() will search for and return a group of up
++ * to @nr_entries pages and shadow entries in the mapping. All
++ * entries are placed in @pvec. pagevec_lookup_entries() takes a
++ * reference against actual pages in @pvec.
++ *
++ * The search returns a group of mapping-contiguous entries with
++ * ascending indexes. There may be holes in the indices due to
++ * not-present entries.
++ *
++ * pagevec_lookup_entries() returns the number of entries which were
++ * found.
++ */
++unsigned pagevec_lookup_entries(struct pagevec *pvec,
++ struct address_space *mapping,
++ pgoff_t start, unsigned nr_pages,
++ pgoff_t *indices)
++{
++ pvec->nr = find_get_entries(mapping, start, nr_pages,
++ pvec->pages, indices);
++ return pagevec_count(pvec);
++}
++
++/**
++ * pagevec_remove_exceptionals - pagevec exceptionals pruning
++ * @pvec: The pagevec to prune
++ *
++ * pagevec_lookup_entries() fills both pages and exceptional radix
++ * tree entries into the pagevec. This function prunes all
++ * exceptionals from @pvec without leaving holes, so that it can be
++ * passed on to page-only pagevec operations.
++ */
++void pagevec_remove_exceptionals(struct pagevec *pvec)
++{
++ int i, j;
++
++ for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
++ struct page *page = pvec->pages[i];
++ if (!radix_tree_exceptional_entry(page))
++ pvec->pages[j++] = page;
++ }
++ pvec->nr = j;
++}
++
++/**
+ * pagevec_lookup - gang pagecache lookup
+ * @pvec: Where the resulting pages are placed
+ * @mapping: The address_space to search
+--- a/mm/truncate.c
++++ b/mm/truncate.c
+@@ -23,6 +23,22 @@
+ #include <linux/rmap.h>
+ #include "internal.h"
+
++static void clear_exceptional_entry(struct address_space *mapping,
++ pgoff_t index, void *entry)
++{
++ /* Handled by shmem itself */
++ if (shmem_mapping(mapping))
++ return;
++
++ spin_lock_irq(&mapping->tree_lock);
++ /*
++ * Regular page slots are stabilized by the page lock even
++ * without the tree itself locked. These unlocked entries
++ * need verification under the tree lock.
++ */
++ radix_tree_delete_item(&mapping->page_tree, index, entry);
++ spin_unlock_irq(&mapping->tree_lock);
++}
+
+ /**
+ * do_invalidatepage - invalidate part or all of a page
+@@ -209,6 +225,7 @@ void truncate_inode_pages_range(struct a
+ unsigned int partial_start; /* inclusive */
+ unsigned int partial_end; /* exclusive */
+ struct pagevec pvec;
++ pgoff_t indices[PAGEVEC_SIZE];
+ pgoff_t index;
+ int i;
+
+@@ -239,17 +256,23 @@ void truncate_inode_pages_range(struct a
+
+ pagevec_init(&pvec, 0);
+ index = start;
+- while (index < end && pagevec_lookup(&pvec, mapping, index,
+- min(end - index, (pgoff_t)PAGEVEC_SIZE))) {
++ while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
++ min(end - index, (pgoff_t)PAGEVEC_SIZE),
++ indices)) {
+ mem_cgroup_uncharge_start();
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
+
+ /* We rely upon deletion not changing page->index */
+- index = page->index;
++ index = indices[i];
+ if (index >= end)
+ break;
+
++ if (radix_tree_exceptional_entry(page)) {
++ clear_exceptional_entry(mapping, index, page);
++ continue;
++ }
++
+ if (!trylock_page(page))
+ continue;
+ WARN_ON(page->index != index);
+@@ -260,6 +283,7 @@ void truncate_inode_pages_range(struct a
+ truncate_inode_page(mapping, page);
+ unlock_page(page);
+ }
++ pagevec_remove_exceptionals(&pvec);
+ pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
+ cond_resched();
+@@ -308,14 +332,16 @@ void truncate_inode_pages_range(struct a
+ index = start;
+ for ( ; ; ) {
+ cond_resched();
+- if (!pagevec_lookup(&pvec, mapping, index,
+- min(end - index, (pgoff_t)PAGEVEC_SIZE))) {
++ if (!pagevec_lookup_entries(&pvec, mapping, index,
++ min(end - index, (pgoff_t)PAGEVEC_SIZE),
++ indices)) {
+ if (index == start)
+ break;
+ index = start;
+ continue;
+ }
+- if (index == start && pvec.pages[0]->index >= end) {
++ if (index == start && indices[0] >= end) {
++ pagevec_remove_exceptionals(&pvec);
+ pagevec_release(&pvec);
+ break;
+ }
+@@ -324,16 +350,22 @@ void truncate_inode_pages_range(struct a
+ struct page *page = pvec.pages[i];
+
+ /* We rely upon deletion not changing page->index */
+- index = page->index;
++ index = indices[i];
+ if (index >= end)
+ break;
+
++ if (radix_tree_exceptional_entry(page)) {
++ clear_exceptional_entry(mapping, index, page);
++ continue;
++ }
++
+ lock_page(page);
+ WARN_ON(page->index != index);
+ wait_on_page_writeback(page);
+ truncate_inode_page(mapping, page);
+ unlock_page(page);
+ }
++ pagevec_remove_exceptionals(&pvec);
+ pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
+ index++;
+@@ -376,6 +408,7 @@ EXPORT_SYMBOL(truncate_inode_pages);
+ unsigned long invalidate_mapping_pages(struct address_space *mapping,
+ pgoff_t start, pgoff_t end)
+ {
++ pgoff_t indices[PAGEVEC_SIZE];
+ struct pagevec pvec;
+ pgoff_t index = start;
+ unsigned long ret;
+@@ -391,17 +424,23 @@ unsigned long invalidate_mapping_pages(s
+ */
+
+ pagevec_init(&pvec, 0);
+- while (index <= end && pagevec_lookup(&pvec, mapping, index,
+- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
++ while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
++ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
++ indices)) {
+ mem_cgroup_uncharge_start();
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
+
+ /* We rely upon deletion not changing page->index */
+- index = page->index;
++ index = indices[i];
+ if (index > end)
+ break;
+
++ if (radix_tree_exceptional_entry(page)) {
++ clear_exceptional_entry(mapping, index, page);
++ continue;
++ }
++
+ if (!trylock_page(page))
+ continue;
+ WARN_ON(page->index != index);
+@@ -415,6 +454,7 @@ unsigned long invalidate_mapping_pages(s
+ deactivate_page(page);
+ count += ret;
+ }
++ pagevec_remove_exceptionals(&pvec);
+ pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
+ cond_resched();
+@@ -482,6 +522,7 @@ static int do_launder_page(struct addres
+ int invalidate_inode_pages2_range(struct address_space *mapping,
+ pgoff_t start, pgoff_t end)
+ {
++ pgoff_t indices[PAGEVEC_SIZE];
+ struct pagevec pvec;
+ pgoff_t index;
+ int i;
+@@ -492,17 +533,23 @@ int invalidate_inode_pages2_range(struct
+ cleancache_invalidate_inode(mapping);
+ pagevec_init(&pvec, 0);
+ index = start;
+- while (index <= end && pagevec_lookup(&pvec, mapping, index,
+- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
++ while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
++ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
++ indices)) {
+ mem_cgroup_uncharge_start();
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
+
+ /* We rely upon deletion not changing page->index */
+- index = page->index;
++ index = indices[i];
+ if (index > end)
+ break;
+
++ if (radix_tree_exceptional_entry(page)) {
++ clear_exceptional_entry(mapping, index, page);
++ continue;
++ }
++
+ lock_page(page);
+ WARN_ON(page->index != index);
+ if (page->mapping != mapping) {
+@@ -540,6 +587,7 @@ int invalidate_inode_pages2_range(struct
+ ret = ret2;
+ unlock_page(page);
+ }
++ pagevec_remove_exceptionals(&pvec);
+ pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
+ cond_resched();
--- /dev/null
+From 55231e5c898c5c03c14194001e349f40f59bd300 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 22 May 2014 11:54:17 -0700
+Subject: mm: madvise: fix MADV_WILLNEED on shmem swapouts
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 55231e5c898c5c03c14194001e349f40f59bd300 upstream.
+
+MADV_WILLNEED currently does not read swapped out shmem pages back in.
+
+Commit 0cd6144aadd2 ("mm + fs: prepare for non-page entries in page
+cache radix trees") made find_get_page() filter exceptional radix tree
+entries but failed to convert all find_get_page() callers that WANT
+exceptional entries over to find_get_entry(). One of them is shmem swap
+readahead in madvise, which now skips over any swap-out records.
+
+Convert it to find_get_entry().
+
+Fixes: 0cd6144aadd2 ("mm + fs: prepare for non-page entries in page cache radix trees")
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/madvise.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -195,7 +195,7 @@ static void force_shm_swapin_readahead(s
+ for (; start < end; start += PAGE_SIZE) {
+ index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+
+- page = find_get_page(mapping, index);
++ page = find_get_entry(mapping, index);
+ if (!radix_tree_exceptional_entry(page)) {
+ if (page)
+ page_cache_release(page);
--- /dev/null
+From 68711a746345c44ae00c64d8dbac6a9ce13ac54a Mon Sep 17 00:00:00 2001
+From: David Rientjes <rientjes@google.com>
+Date: Wed, 4 Jun 2014 16:08:25 -0700
+Subject: mm, migration: add destination page freeing callback
+
+From: David Rientjes <rientjes@google.com>
+
+commit 68711a746345c44ae00c64d8dbac6a9ce13ac54a upstream.
+
+Memory migration uses a callback defined by the caller to determine how to
+allocate destination pages. When migration fails for a source page,
+however, it frees the destination page back to the system.
+
+This patch adds a memory migration callback defined by the caller to
+determine how to free destination pages. If a caller, such as memory
+compaction, builds its own freelist for migration targets, this can reuse
+already freed memory instead of scanning additional memory.
+
+If the caller provides a function to handle freeing of destination pages,
+it is called when page migration fails. If the caller passes NULL then
+freeing back to the system will be handled as usual. This patch
+introduces no functional change.
+
+Signed-off-by: David Rientjes <rientjes@google.com>
+Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Greg Thelen <gthelen@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ include/linux/migrate.h | 11 ++++++---
+ mm/compaction.c | 2 -
+ mm/memory-failure.c | 4 +--
+ mm/memory_hotplug.c | 2 -
+ mm/mempolicy.c | 4 +--
+ mm/migrate.c | 55 ++++++++++++++++++++++++++++++++++--------------
+ mm/page_alloc.c | 2 -
+ 7 files changed, 53 insertions(+), 27 deletions(-)
+
+--- a/include/linux/migrate.h
++++ b/include/linux/migrate.h
+@@ -5,7 +5,9 @@
+ #include <linux/mempolicy.h>
+ #include <linux/migrate_mode.h>
+
+-typedef struct page *new_page_t(struct page *, unsigned long private, int **);
++typedef struct page *new_page_t(struct page *page, unsigned long private,
++ int **reason);
++typedef void free_page_t(struct page *page, unsigned long private);
+
+ /*
+ * Return values from addresss_space_operations.migratepage():
+@@ -38,7 +40,7 @@ enum migrate_reason {
+ extern void putback_movable_pages(struct list_head *l);
+ extern int migrate_page(struct address_space *,
+ struct page *, struct page *, enum migrate_mode);
+-extern int migrate_pages(struct list_head *l, new_page_t x,
++extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
+ unsigned long private, enum migrate_mode mode, int reason);
+
+ extern int migrate_prep(void);
+@@ -56,8 +58,9 @@ extern int migrate_page_move_mapping(str
+ #else
+
+ static inline void putback_movable_pages(struct list_head *l) {}
+-static inline int migrate_pages(struct list_head *l, new_page_t x,
+- unsigned long private, enum migrate_mode mode, int reason)
++static inline int migrate_pages(struct list_head *l, new_page_t new,
++ free_page_t free, unsigned long private, enum migrate_mode mode,
++ int reason)
+ { return -ENOSYS; }
+
+ static inline int migrate_prep(void) { return -ENOSYS; }
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -1016,7 +1016,7 @@ static int compact_zone(struct zone *zon
+ }
+
+ nr_migrate = cc->nr_migratepages;
+- err = migrate_pages(&cc->migratepages, compaction_alloc,
++ err = migrate_pages(&cc->migratepages, compaction_alloc, NULL,
+ (unsigned long)cc,
+ cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC,
+ MR_COMPACTION);
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1540,7 +1540,7 @@ static int soft_offline_huge_page(struct
+
+ /* Keep page count to indicate a given hugepage is isolated. */
+ list_move(&hpage->lru, &pagelist);
+- ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
++ ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
+ MIGRATE_SYNC, MR_MEMORY_FAILURE);
+ if (ret) {
+ pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
+@@ -1621,7 +1621,7 @@ static int __soft_offline_page(struct pa
+ inc_zone_page_state(page, NR_ISOLATED_ANON +
+ page_is_file_cache(page));
+ list_add(&page->lru, &pagelist);
+- ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
++ ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
+ MIGRATE_SYNC, MR_MEMORY_FAILURE);
+ if (ret) {
+ if (!list_empty(&pagelist)) {
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -1332,7 +1332,7 @@ do_migrate_range(unsigned long start_pfn
+ * alloc_migrate_target should be improooooved!!
+ * migrate_pages returns # of failed pages.
+ */
+- ret = migrate_pages(&source, alloc_migrate_target, 0,
++ ret = migrate_pages(&source, alloc_migrate_target, NULL, 0,
+ MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
+ if (ret)
+ putback_movable_pages(&source);
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -1060,7 +1060,7 @@ static int migrate_to_node(struct mm_str
+ flags | MPOL_MF_DISCONTIG_OK, &pagelist);
+
+ if (!list_empty(&pagelist)) {
+- err = migrate_pages(&pagelist, new_node_page, dest,
++ err = migrate_pages(&pagelist, new_node_page, NULL, dest,
+ MIGRATE_SYNC, MR_SYSCALL);
+ if (err)
+ putback_movable_pages(&pagelist);
+@@ -1306,7 +1306,7 @@ static long do_mbind(unsigned long start
+
+ if (!list_empty(&pagelist)) {
+ WARN_ON_ONCE(flags & MPOL_MF_LAZY);
+- nr_failed = migrate_pages(&pagelist, new_page,
++ nr_failed = migrate_pages(&pagelist, new_page, NULL,
+ start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND);
+ if (nr_failed)
+ putback_movable_pages(&pagelist);
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -941,8 +941,9 @@ out:
+ * Obtain the lock on page, remove all ptes and migrate the page
+ * to the newly allocated page in newpage.
+ */
+-static int unmap_and_move(new_page_t get_new_page, unsigned long private,
+- struct page *page, int force, enum migrate_mode mode)
++static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page,
++ unsigned long private, struct page *page, int force,
++ enum migrate_mode mode)
+ {
+ int rc = 0;
+ int *result = NULL;
+@@ -986,11 +987,17 @@ out:
+ page_is_file_cache(page));
+ putback_lru_page(page);
+ }
++
+ /*
+- * Move the new page to the LRU. If migration was not successful
+- * then this will free the page.
++ * If migration was not successful and there's a freeing callback, use
++ * it. Otherwise, putback_lru_page() will drop the reference grabbed
++ * during isolation.
+ */
+- putback_lru_page(newpage);
++ if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
++ put_new_page(newpage, private);
++ else
++ putback_lru_page(newpage);
++
+ if (result) {
+ if (rc)
+ *result = rc;
+@@ -1019,8 +1026,9 @@ out:
+ * will wait in the page fault for migration to complete.
+ */
+ static int unmap_and_move_huge_page(new_page_t get_new_page,
+- unsigned long private, struct page *hpage,
+- int force, enum migrate_mode mode)
++ free_page_t put_new_page, unsigned long private,
++ struct page *hpage, int force,
++ enum migrate_mode mode)
+ {
+ int rc = 0;
+ int *result = NULL;
+@@ -1059,20 +1067,30 @@ static int unmap_and_move_huge_page(new_
+ if (!page_mapped(hpage))
+ rc = move_to_new_page(new_hpage, hpage, 1, mode);
+
+- if (rc)
++ if (rc != MIGRATEPAGE_SUCCESS)
+ remove_migration_ptes(hpage, hpage);
+
+ if (anon_vma)
+ put_anon_vma(anon_vma);
+
+- if (!rc)
++ if (rc == MIGRATEPAGE_SUCCESS)
+ hugetlb_cgroup_migrate(hpage, new_hpage);
+
+ unlock_page(hpage);
+ out:
+ if (rc != -EAGAIN)
+ putback_active_hugepage(hpage);
+- put_page(new_hpage);
++
++ /*
++ * If migration was not successful and there's a freeing callback, use
++ * it. Otherwise, put_page() will drop the reference grabbed during
++ * isolation.
++ */
++ if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
++ put_new_page(new_hpage, private);
++ else
++ put_page(new_hpage);
++
+ if (result) {
+ if (rc)
+ *result = rc;
+@@ -1089,6 +1107,8 @@ out:
+ * @from: The list of pages to be migrated.
+ * @get_new_page: The function used to allocate free pages to be used
+ * as the target of the page migration.
++ * @put_new_page: The function used to free target pages if migration
++ * fails, or NULL if no special handling is necessary.
+ * @private: Private data to be passed on to get_new_page()
+ * @mode: The migration mode that specifies the constraints for
+ * page migration, if any.
+@@ -1102,7 +1122,8 @@ out:
+ * Returns the number of pages that were not migrated, or an error code.
+ */
+ int migrate_pages(struct list_head *from, new_page_t get_new_page,
+- unsigned long private, enum migrate_mode mode, int reason)
++ free_page_t put_new_page, unsigned long private,
++ enum migrate_mode mode, int reason)
+ {
+ int retry = 1;
+ int nr_failed = 0;
+@@ -1124,10 +1145,11 @@ int migrate_pages(struct list_head *from
+
+ if (PageHuge(page))
+ rc = unmap_and_move_huge_page(get_new_page,
+- private, page, pass > 2, mode);
++ put_new_page, private, page,
++ pass > 2, mode);
+ else
+- rc = unmap_and_move(get_new_page, private,
+- page, pass > 2, mode);
++ rc = unmap_and_move(get_new_page, put_new_page,
++ private, page, pass > 2, mode);
+
+ switch(rc) {
+ case -ENOMEM:
+@@ -1276,7 +1298,7 @@ set_status:
+
+ err = 0;
+ if (!list_empty(&pagelist)) {
+- err = migrate_pages(&pagelist, new_page_node,
++ err = migrate_pages(&pagelist, new_page_node, NULL,
+ (unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
+ if (err)
+ putback_movable_pages(&pagelist);
+@@ -1732,7 +1754,8 @@ int migrate_misplaced_page(struct page *
+
+ list_add(&page->lru, &migratepages);
+ nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
+- node, MIGRATE_ASYNC, MR_NUMA_MISPLACED);
++ NULL, node, MIGRATE_ASYNC,
++ MR_NUMA_MISPLACED);
+ if (nr_remaining) {
+ if (!list_empty(&migratepages)) {
+ list_del(&page->lru);
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -6261,7 +6261,7 @@ static int __alloc_contig_migrate_range(
+ cc->nr_migratepages -= nr_reclaimed;
+
+ ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
+- 0, MIGRATE_SYNC, MR_CMA);
++ NULL, 0, MIGRATE_SYNC, MR_CMA);
+ }
+ if (ret < 0) {
+ putback_movable_pages(&cc->migratepages);
--- /dev/null
+From 29f175d125f0f3a9503af8a5596f93d714cceb08 Mon Sep 17 00:00:00 2001
+From: Fabian Frederick <fabf@skynet.be>
+Date: Mon, 7 Apr 2014 15:37:55 -0700
+Subject: mm/readahead.c: inline ra_submit
+
+From: Fabian Frederick <fabf@skynet.be>
+
+commit 29f175d125f0f3a9503af8a5596f93d714cceb08 upstream.
+
+Commit f9acc8c7b35a ("readahead: sanify file_ra_state names") left
+ra_submit with a single function call.
+
+Move ra_submit to internal.h and inline it to save some stack. Thanks
+to Andrew Morton for commenting different versions.
+
+Signed-off-by: Fabian Frederick <fabf@skynet.be>
+Suggested-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mm.h | 3 ---
+ mm/internal.h | 15 +++++++++++++++
+ mm/readahead.c | 21 +++------------------
+ 3 files changed, 18 insertions(+), 21 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1856,9 +1856,6 @@ void page_cache_async_readahead(struct a
+ unsigned long size);
+
+ unsigned long max_sane_readahead(unsigned long nr);
+-unsigned long ra_submit(struct file_ra_state *ra,
+- struct address_space *mapping,
+- struct file *filp);
+
+ /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
+ extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -11,6 +11,7 @@
+ #ifndef __MM_INTERNAL_H
+ #define __MM_INTERNAL_H
+
++#include <linux/fs.h>
+ #include <linux/mm.h>
+
+ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
+@@ -21,6 +22,20 @@ static inline void set_page_count(struct
+ atomic_set(&page->_count, v);
+ }
+
++extern int __do_page_cache_readahead(struct address_space *mapping,
++ struct file *filp, pgoff_t offset, unsigned long nr_to_read,
++ unsigned long lookahead_size);
++
++/*
++ * Submit IO for the read-ahead request in file_ra_state.
++ */
++static inline unsigned long ra_submit(struct file_ra_state *ra,
++ struct address_space *mapping, struct file *filp)
++{
++ return __do_page_cache_readahead(mapping, filp,
++ ra->start, ra->size, ra->async_size);
++}
++
+ /*
+ * Turn a non-refcounted page (->_count == 0) into refcounted with
+ * a count of one.
+--- a/mm/readahead.c
++++ b/mm/readahead.c
+@@ -8,9 +8,7 @@
+ */
+
+ #include <linux/kernel.h>
+-#include <linux/fs.h>
+ #include <linux/gfp.h>
+-#include <linux/mm.h>
+ #include <linux/export.h>
+ #include <linux/blkdev.h>
+ #include <linux/backing-dev.h>
+@@ -20,6 +18,8 @@
+ #include <linux/syscalls.h>
+ #include <linux/file.h>
+
++#include "internal.h"
++
+ /*
+ * Initialise a struct file's readahead state. Assumes that the caller has
+ * memset *ra to zero.
+@@ -149,8 +149,7 @@ out:
+ *
+ * Returns the number of pages requested, or the maximum amount of I/O allowed.
+ */
+-static int
+-__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
++int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
+ pgoff_t offset, unsigned long nr_to_read,
+ unsigned long lookahead_size)
+ {
+@@ -244,20 +243,6 @@ unsigned long max_sane_readahead(unsigne
+ }
+
+ /*
+- * Submit IO for the read-ahead request in file_ra_state.
+- */
+-unsigned long ra_submit(struct file_ra_state *ra,
+- struct address_space *mapping, struct file *filp)
+-{
+- int actual;
+-
+- actual = __do_page_cache_readahead(mapping, filp,
+- ra->start, ra->size, ra->async_size);
+-
+- return actual;
+-}
+-
+-/*
+ * Set the initial window size, round to next power of 2 and square
+ * for small size, x 4 for medium, and x 2 for large
+ * for 128k (32 page) max ra
--- /dev/null
+From 67f9fd91f93c582b7de2ab9325b6e179db77e4d5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sasha.levin@oracle.com>
+Date: Thu, 3 Apr 2014 14:48:18 -0700
+Subject: mm: remove read_cache_page_async()
+
+From: Sasha Levin <sasha.levin@oracle.com>
+
+commit 67f9fd91f93c582b7de2ab9325b6e179db77e4d5 upstream.
+
+This patch removes read_cache_page_async() which wasn't really needed
+anywhere and simplifies the code around it a bit.
+
+read_cache_page_async() is useful when we want to read a page into the
+cache without waiting for it to complete. This happens when the
+appropriate callback 'filler' doesn't complete its read operation and
+releases the page lock immediately, and instead queues a different
+completion routine to do that. This never actually happened anywhere in
+the code.
+
+read_cache_page_async() had 3 different callers:
+
+- read_cache_page() which is the sync version, it would just wait for
+ the requested read to complete using wait_on_page_read().
+
+- JFFS2 would call it from jffs2_gc_fetch_page(), but the filler
+ function it supplied doesn't do any async reads, and would complete
+ before the filler function returns - making it actually a sync read.
+
+- CRAMFS would call it using the read_mapping_page_async() wrapper, with
+ a similar story to JFFS2 - the filler function doesn't do anything that
+ reminds async reads and would always complete before the filler function
+ returns.
+
+To sum it up, the code in mm/filemap.c never took advantage of having
+read_cache_page_async(). While there are filler callbacks that do async
+reads (such as the block one), we always called it with the
+read_cache_page().
+
+This patch adds a mandatory wait for read to complete when adding a new
+page to the cache, and removes read_cache_page_async() and its wrappers.
+
+Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cramfs/inode.c | 3 --
+ fs/jffs2/fs.c | 2 -
+ include/linux/pagemap.h | 10 -------
+ mm/filemap.c | 64 +++++++++++++++++-------------------------------
+ 4 files changed, 25 insertions(+), 54 deletions(-)
+
+--- a/fs/cramfs/inode.c
++++ b/fs/cramfs/inode.c
+@@ -195,8 +195,7 @@ static void *cramfs_read(struct super_bl
+ struct page *page = NULL;
+
+ if (blocknr + i < devsize) {
+- page = read_mapping_page_async(mapping, blocknr + i,
+- NULL);
++ page = read_mapping_page(mapping, blocknr + i, NULL);
+ /* synchronous error? */
+ if (IS_ERR(page))
+ page = NULL;
+--- a/fs/jffs2/fs.c
++++ b/fs/jffs2/fs.c
+@@ -687,7 +687,7 @@ unsigned char *jffs2_gc_fetch_page(struc
+ struct inode *inode = OFNI_EDONI_2SFFJ(f);
+ struct page *pg;
+
+- pg = read_cache_page_async(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
++ pg = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
+ (void *)jffs2_do_readpage_unlock, inode);
+ if (IS_ERR(pg))
+ return (void *)pg;
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -278,8 +278,6 @@ static inline struct page *grab_cache_pa
+
+ extern struct page * grab_cache_page_nowait(struct address_space *mapping,
+ pgoff_t index);
+-extern struct page * read_cache_page_async(struct address_space *mapping,
+- pgoff_t index, filler_t *filler, void *data);
+ extern struct page * read_cache_page(struct address_space *mapping,
+ pgoff_t index, filler_t *filler, void *data);
+ extern struct page * read_cache_page_gfp(struct address_space *mapping,
+@@ -287,14 +285,6 @@ extern struct page * read_cache_page_gfp
+ extern int read_cache_pages(struct address_space *mapping,
+ struct list_head *pages, filler_t *filler, void *data);
+
+-static inline struct page *read_mapping_page_async(
+- struct address_space *mapping,
+- pgoff_t index, void *data)
+-{
+- filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+- return read_cache_page_async(mapping, index, filler, data);
+-}
+-
+ static inline struct page *read_mapping_page(struct address_space *mapping,
+ pgoff_t index, void *data)
+ {
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -2027,6 +2027,18 @@ int generic_file_readonly_mmap(struct fi
+ EXPORT_SYMBOL(generic_file_mmap);
+ EXPORT_SYMBOL(generic_file_readonly_mmap);
+
++static struct page *wait_on_page_read(struct page *page)
++{
++ if (!IS_ERR(page)) {
++ wait_on_page_locked(page);
++ if (!PageUptodate(page)) {
++ page_cache_release(page);
++ page = ERR_PTR(-EIO);
++ }
++ }
++ return page;
++}
++
+ static struct page *__read_cache_page(struct address_space *mapping,
+ pgoff_t index,
+ int (*filler)(void *, struct page *),
+@@ -2053,6 +2065,8 @@ repeat:
+ if (err < 0) {
+ page_cache_release(page);
+ page = ERR_PTR(err);
++ } else {
++ page = wait_on_page_read(page);
+ }
+ }
+ return page;
+@@ -2089,6 +2103,10 @@ retry:
+ if (err < 0) {
+ page_cache_release(page);
+ return ERR_PTR(err);
++ } else {
++ page = wait_on_page_read(page);
++ if (IS_ERR(page))
++ return page;
+ }
+ out:
+ mark_page_accessed(page);
+@@ -2096,40 +2114,25 @@ out:
+ }
+
+ /**
+- * read_cache_page_async - read into page cache, fill it if needed
++ * read_cache_page - read into page cache, fill it if needed
+ * @mapping: the page's address_space
+ * @index: the page index
+ * @filler: function to perform the read
+ * @data: first arg to filler(data, page) function, often left as NULL
+ *
+- * Same as read_cache_page, but don't wait for page to become unlocked
+- * after submitting it to the filler.
+- *
+ * Read into the page cache. If a page already exists, and PageUptodate() is
+- * not set, try to fill the page but don't wait for it to become unlocked.
++ * not set, try to fill the page and wait for it to become unlocked.
+ *
+ * If the page does not get brought uptodate, return -EIO.
+ */
+-struct page *read_cache_page_async(struct address_space *mapping,
++struct page *read_cache_page(struct address_space *mapping,
+ pgoff_t index,
+ int (*filler)(void *, struct page *),
+ void *data)
+ {
+ return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
+ }
+-EXPORT_SYMBOL(read_cache_page_async);
+-
+-static struct page *wait_on_page_read(struct page *page)
+-{
+- if (!IS_ERR(page)) {
+- wait_on_page_locked(page);
+- if (!PageUptodate(page)) {
+- page_cache_release(page);
+- page = ERR_PTR(-EIO);
+- }
+- }
+- return page;
+-}
++EXPORT_SYMBOL(read_cache_page);
+
+ /**
+ * read_cache_page_gfp - read into page cache, using specified page allocation flags.
+@@ -2148,31 +2151,10 @@ struct page *read_cache_page_gfp(struct
+ {
+ filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+
+- return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp));
++ return do_read_cache_page(mapping, index, filler, NULL, gfp);
+ }
+ EXPORT_SYMBOL(read_cache_page_gfp);
+
+-/**
+- * read_cache_page - read into page cache, fill it if needed
+- * @mapping: the page's address_space
+- * @index: the page index
+- * @filler: function to perform the read
+- * @data: first arg to filler(data, page) function, often left as NULL
+- *
+- * Read into the page cache. If a page already exists, and PageUptodate() is
+- * not set, try to fill the page then wait for it to become unlocked.
+- *
+- * If the page does not get brought uptodate, return -EIO.
+- */
+-struct page *read_cache_page(struct address_space *mapping,
+- pgoff_t index,
+- int (*filler)(void *, struct page *),
+- void *data)
+-{
+- return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
+-}
+-EXPORT_SYMBOL(read_cache_page);
+-
+ static size_t __iovec_copy_from_user_inatomic(char *vaddr,
+ const struct iovec *iov, size_t base, size_t bytes)
+ {
--- /dev/null
+From 6dbaf22ce1f1dfba33313198eb5bd989ae76dd87 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 3 Apr 2014 14:47:41 -0700
+Subject: mm: shmem: save one radix tree lookup when truncating swapped pages
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 6dbaf22ce1f1dfba33313198eb5bd989ae76dd87 upstream.
+
+Page cache radix tree slots are usually stabilized by the page lock, but
+shmem's swap cookies have no such thing. Because the overall truncation
+loop is lockless, the swap entry is currently confirmed by a tree lookup
+and then deleted by another tree lookup under the same tree lock region.
+
+Use radix_tree_delete_item() instead, which does the verification and
+deletion with only one lookup. This also allows removing the
+delete-only special case from shmem_radix_tree_replace().
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Minchan Kim <minchan@kernel.org>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Bob Liu <bob.liu@oracle.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Dave Chinner <david@fromorbit.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Luigi Semenzato <semenzato@google.com>
+Cc: Metin Doslu <metin@citusdata.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Ozgun Erdogan <ozgun@citusdata.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Roman Gushchin <klamm@yandex-team.ru>
+Cc: Ryan Mallon <rmallon@gmail.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/shmem.c | 25 ++++++++++++-------------
+ 1 file changed, 12 insertions(+), 13 deletions(-)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -243,19 +243,17 @@ static int shmem_radix_tree_replace(stru
+ pgoff_t index, void *expected, void *replacement)
+ {
+ void **pslot;
+- void *item = NULL;
++ void *item;
+
+ VM_BUG_ON(!expected);
++ VM_BUG_ON(!replacement);
+ pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
+- if (pslot)
+- item = radix_tree_deref_slot_protected(pslot,
+- &mapping->tree_lock);
++ if (!pslot)
++ return -ENOENT;
++ item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock);
+ if (item != expected)
+ return -ENOENT;
+- if (replacement)
+- radix_tree_replace_slot(pslot, replacement);
+- else
+- radix_tree_delete(&mapping->page_tree, index);
++ radix_tree_replace_slot(pslot, replacement);
+ return 0;
+ }
+
+@@ -387,14 +385,15 @@ export:
+ static int shmem_free_swap(struct address_space *mapping,
+ pgoff_t index, void *radswap)
+ {
+- int error;
++ void *old;
+
+ spin_lock_irq(&mapping->tree_lock);
+- error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
++ old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
+ spin_unlock_irq(&mapping->tree_lock);
+- if (!error)
+- free_swap_and_cache(radix_to_swp_entry(radswap));
+- return error;
++ if (old != radswap)
++ return -ENOENT;
++ free_swap_and_cache(radix_to_swp_entry(radswap));
++ return 0;
+ }
+
+ /*
net-sctp-fix-skb_over_panic-when-receiving-malformed-asconf-chunks.patch
iwlwifi-configure-the-ltr.patch
regmap-fix-kernel-hang-on-regmap_bulk_write-with-zero-val_count.patch
+lib-radix-tree-add-radix_tree_delete_item.patch
+mm-shmem-save-one-radix-tree-lookup-when-truncating-swapped-pages.patch
+mm-filemap-move-radix-tree-hole-searching-here.patch
+mm-fs-prepare-for-non-page-entries-in-page-cache-radix-trees.patch
+mm-madvise-fix-madv_willneed-on-shmem-swapouts.patch
+mm-remove-read_cache_page_async.patch
+callers-of-iov_copy_from_user_atomic-don-t-need.patch
+mm-readahead.c-inline-ra_submit.patch
+mm-compaction-clean-up-unused-code-lines.patch
+mm-compaction-cleanup-isolate_freepages.patch
+mm-migration-add-destination-page-freeing-callback.patch