From: Greg Kroah-Hartman Date: Wed, 19 Nov 2014 20:31:34 +0000 (-0800) Subject: 3.14-stable patches X-Git-Tag: v3.10.61~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=373f6a07bf31dd0519c9b178e5303363932256c4;p=thirdparty%2Fkernel%2Fstable-queue.git 3.14-stable patches added patches: callers-of-iov_copy_from_user_atomic-don-t-need.patch lib-radix-tree-add-radix_tree_delete_item.patch mm-compaction-clean-up-unused-code-lines.patch mm-compaction-cleanup-isolate_freepages.patch mm-filemap-move-radix-tree-hole-searching-here.patch mm-fs-prepare-for-non-page-entries-in-page-cache-radix-trees.patch mm-madvise-fix-madv_willneed-on-shmem-swapouts.patch mm-migration-add-destination-page-freeing-callback.patch mm-readahead.c-inline-ra_submit.patch mm-remove-read_cache_page_async.patch mm-shmem-save-one-radix-tree-lookup-when-truncating-swapped-pages.patch --- diff --git a/queue-3.14/callers-of-iov_copy_from_user_atomic-don-t-need.patch b/queue-3.14/callers-of-iov_copy_from_user_atomic-don-t-need.patch new file mode 100644 index 00000000000..c87aec91d72 --- /dev/null +++ b/queue-3.14/callers-of-iov_copy_from_user_atomic-don-t-need.patch @@ -0,0 +1,69 @@ +From 9e8c2af96e0d2d5fe298dd796fb6bc16e888a48d Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Sun, 2 Feb 2014 22:10:25 -0500 +Subject: callers of iov_copy_from_user_atomic() don't need pagecache_disable() + +From: Al Viro + +commit 9e8c2af96e0d2d5fe298dd796fb6bc16e888a48d upstream. + +... it does that itself (via kmap_atomic()) + +Signed-off-by: Al Viro +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/file.c | 5 ----- + fs/fuse/file.c | 2 -- + mm/filemap.c | 3 --- + 3 files changed, 10 deletions(-) + +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -425,13 +425,8 @@ static noinline int btrfs_copy_from_user + struct page *page = prepared_pages[pg]; + /* + * Copy data from userspace to the current page +- * +- * Disable pagefault to avoid recursive lock since +- * the pages are already locked + */ +- pagefault_disable(); + copied = iov_iter_copy_from_user_atomic(page, i, offset, count); +- pagefault_enable(); + + /* Flush processor's dcache for this page */ + flush_dcache_page(page); +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -1003,9 +1003,7 @@ static ssize_t fuse_fill_write_pages(str + if (mapping_writably_mapped(mapping)) + flush_dcache_page(page); + +- pagefault_disable(); + tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); +- pagefault_enable(); + flush_dcache_page(page); + + mark_page_accessed(page); +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -2188,7 +2188,6 @@ size_t iov_iter_copy_from_user_atomic(st + char *kaddr; + size_t copied; + +- BUG_ON(!in_atomic()); + kaddr = kmap_atomic(page); + if (likely(i->nr_segs == 1)) { + int left; +@@ -2562,9 +2561,7 @@ again: + if (mapping_writably_mapped(mapping)) + flush_dcache_page(page); + +- pagefault_disable(); + copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); +- pagefault_enable(); + flush_dcache_page(page); + + mark_page_accessed(page); diff --git a/queue-3.14/lib-radix-tree-add-radix_tree_delete_item.patch b/queue-3.14/lib-radix-tree-add-radix_tree_delete_item.patch new file mode 100644 index 00000000000..ce8d71dfe39 --- /dev/null +++ b/queue-3.14/lib-radix-tree-add-radix_tree_delete_item.patch @@ -0,0 +1,117 @@ +From 53c59f262d747ea82e7414774c59a489501186a0 Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Thu, 3 Apr 2014 14:47:39 -0700 +Subject: lib: radix-tree: add radix_tree_delete_item() + +From: Johannes Weiner + +commit 53c59f262d747ea82e7414774c59a489501186a0 upstream. + +Provide a function that does not just delete an entry at a given index, +but also allows passing in an expected item. Delete only if that item +is still located at the specified index. + +This is handy when lockless tree traversals want to delete entries as +well because they don't have to do an second, locked lookup to verify +the slot has not changed under them before deleting the entry. + +Signed-off-by: Johannes Weiner +Reviewed-by: Minchan Kim +Reviewed-by: Rik van Riel +Acked-by: Mel Gorman +Cc: Andrea Arcangeli +Cc: Bob Liu +Cc: Christoph Hellwig +Cc: Dave Chinner +Cc: Greg Thelen +Cc: Hugh Dickins +Cc: Jan Kara +Cc: KOSAKI Motohiro +Cc: Luigi Semenzato +Cc: Metin Doslu +Cc: Michel Lespinasse +Cc: Ozgun Erdogan +Cc: Peter Zijlstra +Cc: Roman Gushchin +Cc: Ryan Mallon +Cc: Tejun Heo +Cc: Vlastimil Babka +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/radix-tree.h | 1 + + lib/radix-tree.c | 31 +++++++++++++++++++++++++++---- + 2 files changed, 28 insertions(+), 4 deletions(-) + +--- a/include/linux/radix-tree.h ++++ b/include/linux/radix-tree.h +@@ -219,6 +219,7 @@ static inline void radix_tree_replace_sl + int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); + void *radix_tree_lookup(struct radix_tree_root *, unsigned long); + void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); ++void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); + void *radix_tree_delete(struct radix_tree_root *, unsigned long); + unsigned int + radix_tree_gang_lookup(struct radix_tree_root *root, void **results, +--- a/lib/radix-tree.c ++++ b/lib/radix-tree.c +@@ -1337,15 +1337,18 @@ static inline void radix_tree_shrink(str + } + + /** +- * radix_tree_delete - delete an item from a radix tree ++ * radix_tree_delete_item - delete an item from a radix tree + * @root: radix tree root + * @index: index key ++ * @item: expected item + * +- * Remove the item at @index from the radix tree rooted at @root. ++ * Remove @item at @index from the radix tree rooted at @root. + * +- * Returns the address of the deleted item, or NULL if it was not present. ++ * Returns the address of the deleted item, or NULL if it was not present ++ * or the entry at the given @index was not @item. + */ +-void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) ++void *radix_tree_delete_item(struct radix_tree_root *root, ++ unsigned long index, void *item) + { + struct radix_tree_node *node = NULL; + struct radix_tree_node *slot = NULL; +@@ -1380,6 +1383,11 @@ void *radix_tree_delete(struct radix_tre + if (slot == NULL) + goto out; + ++ if (item && slot != item) { ++ slot = NULL; ++ goto out; ++ } ++ + /* + * Clear all tags associated with the item to be deleted. + * This way of doing it would be inefficient, but seldom is any set. +@@ -1424,6 +1432,21 @@ void *radix_tree_delete(struct radix_tre + out: + return slot; + } ++EXPORT_SYMBOL(radix_tree_delete_item); ++ ++/** ++ * radix_tree_delete - delete an item from a radix tree ++ * @root: radix tree root ++ * @index: index key ++ * ++ * Remove the item at @index from the radix tree rooted at @root. ++ * ++ * Returns the address of the deleted item, or NULL if it was not present. ++ */ ++void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) ++{ ++ return radix_tree_delete_item(root, index, NULL); ++} + EXPORT_SYMBOL(radix_tree_delete); + + /** diff --git a/queue-3.14/mm-compaction-clean-up-unused-code-lines.patch b/queue-3.14/mm-compaction-clean-up-unused-code-lines.patch new file mode 100644 index 00000000000..f6ab43b4f9f --- /dev/null +++ b/queue-3.14/mm-compaction-clean-up-unused-code-lines.patch @@ -0,0 +1,67 @@ +From 13fb44e4b0414d7e718433a49e6430d5b76bd46e Mon Sep 17 00:00:00 2001 +From: Heesub Shin +Date: Wed, 4 Jun 2014 16:07:24 -0700 +Subject: mm/compaction: clean up unused code lines + +From: Heesub Shin + +commit 13fb44e4b0414d7e718433a49e6430d5b76bd46e upstream. + +Remove code lines currently not in use or never called. + +Signed-off-by: Heesub Shin +Acked-by: Vlastimil Babka +Cc: Dongjun Shin +Cc: Sunghwan Yun +Cc: Minchan Kim +Cc: Mel Gorman +Cc: Joonsoo Kim +Cc: Bartlomiej Zolnierkiewicz +Cc: Michal Nazarewicz +Cc: Naoya Horiguchi +Cc: Christoph Lameter +Cc: Rik van Riel +Cc: Dongjun Shin +Cc: Sunghwan Yun +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + +--- + mm/compaction.c | 10 ---------- + 1 file changed, 10 deletions(-) + +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -208,12 +208,6 @@ static bool compact_checklock_irqsave(sp + return true; + } + +-static inline bool compact_trylock_irqsave(spinlock_t *lock, +- unsigned long *flags, struct compact_control *cc) +-{ +- return compact_checklock_irqsave(lock, flags, false, cc); +-} +- + /* Returns true if the page is within a block suitable for migration to */ + static bool suitable_migration_target(struct page *page) + { +@@ -736,7 +730,6 @@ static void isolate_freepages(struct zon + continue; + + /* Found a block suitable for isolating free pages from */ +- isolated = 0; + + /* + * Take care when isolating in last pageblock of a zone which +@@ -1165,9 +1158,6 @@ static void __compact_pgdat(pg_data_t *p + if (zone_watermark_ok(zone, cc->order, + low_wmark_pages(zone), 0, 0)) + compaction_defer_reset(zone, cc->order, false); +- /* Currently async compaction is never deferred. */ +- else if (cc->sync) +- defer_compaction(zone, cc->order); + } + + VM_BUG_ON(!list_empty(&cc->freepages)); diff --git a/queue-3.14/mm-compaction-cleanup-isolate_freepages.patch b/queue-3.14/mm-compaction-cleanup-isolate_freepages.patch new file mode 100644 index 00000000000..2d6c3eea00d --- /dev/null +++ b/queue-3.14/mm-compaction-cleanup-isolate_freepages.patch @@ -0,0 +1,161 @@ +From c96b9e508f3d06ddb601dcc9792d62c044ab359e Mon Sep 17 00:00:00 2001 +From: Vlastimil Babka +Date: Wed, 4 Jun 2014 16:07:26 -0700 +Subject: mm/compaction: cleanup isolate_freepages() + +From: Vlastimil Babka + +commit c96b9e508f3d06ddb601dcc9792d62c044ab359e upstream. + +isolate_freepages() is currently somewhat hard to follow thanks to many +looks like it is related to the 'low_pfn' variable, but in fact it is not. + +This patch renames the 'high_pfn' variable to a hopefully less confusing name, +and slightly changes its handling without a functional change. A comment made +obsolete by recent changes is also updated. + +[akpm@linux-foundation.org: comment fixes, per Minchan] +[iamjoonsoo.kim@lge.com: cleanups] +Signed-off-by: Vlastimil Babka +Cc: Minchan Kim +Cc: Mel Gorman +Cc: Joonsoo Kim +Cc: Bartlomiej Zolnierkiewicz +Cc: Michal Nazarewicz +Cc: Naoya Horiguchi +Cc: Christoph Lameter +Cc: Rik van Riel +Cc: Dongjun Shin +Cc: Sunghwan Yun +Signed-off-by: Joonsoo Kim +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + +--- + mm/compaction.c | 56 +++++++++++++++++++++++++++----------------------------- + 1 file changed, 27 insertions(+), 29 deletions(-) + +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -665,7 +665,10 @@ static void isolate_freepages(struct zon + struct compact_control *cc) + { + struct page *page; +- unsigned long high_pfn, low_pfn, pfn, z_end_pfn; ++ unsigned long block_start_pfn; /* start of current pageblock */ ++ unsigned long block_end_pfn; /* end of current pageblock */ ++ unsigned long low_pfn; /* lowest pfn scanner is able to scan */ ++ unsigned long next_free_pfn; /* start pfn for scaning at next round */ + int nr_freepages = cc->nr_freepages; + struct list_head *freelist = &cc->freepages; + +@@ -673,32 +676,33 @@ static void isolate_freepages(struct zon + * Initialise the free scanner. The starting point is where we last + * successfully isolated from, zone-cached value, or the end of the + * zone when isolating for the first time. We need this aligned to +- * the pageblock boundary, because we do pfn -= pageblock_nr_pages +- * in the for loop. ++ * the pageblock boundary, because we do ++ * block_start_pfn -= pageblock_nr_pages in the for loop. ++ * For ending point, take care when isolating in last pageblock of a ++ * a zone which ends in the middle of a pageblock. + * The low boundary is the end of the pageblock the migration scanner + * is using. + */ +- pfn = cc->free_pfn & ~(pageblock_nr_pages-1); ++ block_start_pfn = cc->free_pfn & ~(pageblock_nr_pages-1); ++ block_end_pfn = min(block_start_pfn + pageblock_nr_pages, ++ zone_end_pfn(zone)); + low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages); + + /* +- * Take care that if the migration scanner is at the end of the zone +- * that the free scanner does not accidentally move to the next zone +- * in the next isolation cycle. ++ * If no pages are isolated, the block_start_pfn < low_pfn check ++ * will kick in. + */ +- high_pfn = min(low_pfn, pfn); +- +- z_end_pfn = zone_end_pfn(zone); ++ next_free_pfn = 0; + + /* + * Isolate free pages until enough are available to migrate the + * pages on cc->migratepages. We stop searching if the migrate + * and free page scanners meet or enough free pages are isolated. + */ +- for (; pfn >= low_pfn && cc->nr_migratepages > nr_freepages; +- pfn -= pageblock_nr_pages) { ++ for (; block_start_pfn >= low_pfn && cc->nr_migratepages > nr_freepages; ++ block_end_pfn = block_start_pfn, ++ block_start_pfn -= pageblock_nr_pages) { + unsigned long isolated; +- unsigned long end_pfn; + + /* + * This can iterate a massively long zone without finding any +@@ -707,7 +711,7 @@ static void isolate_freepages(struct zon + */ + cond_resched(); + +- if (!pfn_valid(pfn)) ++ if (!pfn_valid(block_start_pfn)) + continue; + + /* +@@ -717,7 +721,7 @@ static void isolate_freepages(struct zon + * i.e. it's possible that all pages within a zones range of + * pages do not belong to a single zone. + */ +- page = pfn_to_page(pfn); ++ page = pfn_to_page(block_start_pfn); + if (page_zone(page) != zone) + continue; + +@@ -730,14 +734,8 @@ static void isolate_freepages(struct zon + continue; + + /* Found a block suitable for isolating free pages from */ +- +- /* +- * Take care when isolating in last pageblock of a zone which +- * ends in the middle of a pageblock. +- */ +- end_pfn = min(pfn + pageblock_nr_pages, z_end_pfn); +- isolated = isolate_freepages_block(cc, pfn, end_pfn, +- freelist, false); ++ isolated = isolate_freepages_block(cc, block_start_pfn, ++ block_end_pfn, freelist, false); + nr_freepages += isolated; + + /* +@@ -745,9 +743,9 @@ static void isolate_freepages(struct zon + * looking for free pages, the search will restart here as + * page migration may have returned some pages to the allocator + */ +- if (isolated) { ++ if (isolated && next_free_pfn == 0) { + cc->finished_update_free = true; +- high_pfn = max(high_pfn, pfn); ++ next_free_pfn = block_start_pfn; + } + } + +@@ -758,10 +756,10 @@ static void isolate_freepages(struct zon + * If we crossed the migrate scanner, we want to keep it that way + * so that compact_finished() may detect this + */ +- if (pfn < low_pfn) +- cc->free_pfn = max(pfn, zone->zone_start_pfn); +- else +- cc->free_pfn = high_pfn; ++ if (block_start_pfn < low_pfn) ++ next_free_pfn = cc->migrate_pfn; ++ ++ cc->free_pfn = next_free_pfn; + cc->nr_freepages = nr_freepages; + } + diff --git a/queue-3.14/mm-filemap-move-radix-tree-hole-searching-here.patch b/queue-3.14/mm-filemap-move-radix-tree-hole-searching-here.patch new file mode 100644 index 00000000000..7b1e61287d2 --- /dev/null +++ b/queue-3.14/mm-filemap-move-radix-tree-hole-searching-here.patch @@ -0,0 +1,284 @@ +From e7b563bb2a6f4d974208da46200784b9c5b5a47e Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Thu, 3 Apr 2014 14:47:44 -0700 +Subject: mm: filemap: move radix tree hole searching here + +From: Johannes Weiner + +commit e7b563bb2a6f4d974208da46200784b9c5b5a47e upstream. + +The radix tree hole searching code is only used for page cache, for +example the readahead code trying to get a a picture of the area +surrounding a fault. + +It sufficed to rely on the radix tree definition of holes, which is +"empty tree slot". But this is about to change, though, as shadow page +descriptors will be stored in the page cache after the actual pages get +evicted from memory. + +Move the functions over to mm/filemap.c and make them native page cache +operations, where they can later be adapted to handle the new definition +of "page cache hole". + +Signed-off-by: Johannes Weiner +Reviewed-by: Rik van Riel +Reviewed-by: Minchan Kim +Acked-by: Mel Gorman +Cc: Andrea Arcangeli +Cc: Bob Liu +Cc: Christoph Hellwig +Cc: Dave Chinner +Cc: Greg Thelen +Cc: Hugh Dickins +Cc: Jan Kara +Cc: KOSAKI Motohiro +Cc: Luigi Semenzato +Cc: Metin Doslu +Cc: Michel Lespinasse +Cc: Ozgun Erdogan +Cc: Peter Zijlstra +Cc: Roman Gushchin +Cc: Ryan Mallon +Cc: Tejun Heo +Cc: Vlastimil Babka +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/blocklayout/blocklayout.c | 2 - + include/linux/pagemap.h | 5 ++ + include/linux/radix-tree.h | 4 -- + lib/radix-tree.c | 75 -------------------------------------- + mm/filemap.c | 76 +++++++++++++++++++++++++++++++++++++++ + mm/readahead.c | 4 +- + 6 files changed, 84 insertions(+), 82 deletions(-) + +--- a/fs/nfs/blocklayout/blocklayout.c ++++ b/fs/nfs/blocklayout/blocklayout.c +@@ -1213,7 +1213,7 @@ static u64 pnfs_num_cont_bytes(struct in + end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); + if (end != NFS_I(inode)->npages) { + rcu_read_lock(); +- end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); ++ end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX); + rcu_read_unlock(); + } + +--- a/include/linux/pagemap.h ++++ b/include/linux/pagemap.h +@@ -243,6 +243,11 @@ static inline struct page *page_cache_al + + typedef int filler_t(void *, struct page *); + ++pgoff_t page_cache_next_hole(struct address_space *mapping, ++ pgoff_t index, unsigned long max_scan); ++pgoff_t page_cache_prev_hole(struct address_space *mapping, ++ pgoff_t index, unsigned long max_scan); ++ + extern struct page * find_get_page(struct address_space *mapping, + pgoff_t index); + extern struct page * find_lock_page(struct address_space *mapping, +--- a/include/linux/radix-tree.h ++++ b/include/linux/radix-tree.h +@@ -227,10 +227,6 @@ radix_tree_gang_lookup(struct radix_tree + unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, + void ***results, unsigned long *indices, + unsigned long first_index, unsigned int max_items); +-unsigned long radix_tree_next_hole(struct radix_tree_root *root, +- unsigned long index, unsigned long max_scan); +-unsigned long radix_tree_prev_hole(struct radix_tree_root *root, +- unsigned long index, unsigned long max_scan); + int radix_tree_preload(gfp_t gfp_mask); + int radix_tree_maybe_preload(gfp_t gfp_mask); + void radix_tree_init(void); +--- a/lib/radix-tree.c ++++ b/lib/radix-tree.c +@@ -946,81 +946,6 @@ next: + } + EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); + +- +-/** +- * radix_tree_next_hole - find the next hole (not-present entry) +- * @root: tree root +- * @index: index key +- * @max_scan: maximum range to search +- * +- * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest +- * indexed hole. +- * +- * Returns: the index of the hole if found, otherwise returns an index +- * outside of the set specified (in which case 'return - index >= max_scan' +- * will be true). In rare cases of index wrap-around, 0 will be returned. +- * +- * radix_tree_next_hole may be called under rcu_read_lock. However, like +- * radix_tree_gang_lookup, this will not atomically search a snapshot of +- * the tree at a single point in time. For example, if a hole is created +- * at index 5, then subsequently a hole is created at index 10, +- * radix_tree_next_hole covering both indexes may return 10 if called +- * under rcu_read_lock. +- */ +-unsigned long radix_tree_next_hole(struct radix_tree_root *root, +- unsigned long index, unsigned long max_scan) +-{ +- unsigned long i; +- +- for (i = 0; i < max_scan; i++) { +- if (!radix_tree_lookup(root, index)) +- break; +- index++; +- if (index == 0) +- break; +- } +- +- return index; +-} +-EXPORT_SYMBOL(radix_tree_next_hole); +- +-/** +- * radix_tree_prev_hole - find the prev hole (not-present entry) +- * @root: tree root +- * @index: index key +- * @max_scan: maximum range to search +- * +- * Search backwards in the range [max(index-max_scan+1, 0), index] +- * for the first hole. +- * +- * Returns: the index of the hole if found, otherwise returns an index +- * outside of the set specified (in which case 'index - return >= max_scan' +- * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. +- * +- * radix_tree_next_hole may be called under rcu_read_lock. However, like +- * radix_tree_gang_lookup, this will not atomically search a snapshot of +- * the tree at a single point in time. For example, if a hole is created +- * at index 10, then subsequently a hole is created at index 5, +- * radix_tree_prev_hole covering both indexes may return 5 if called under +- * rcu_read_lock. +- */ +-unsigned long radix_tree_prev_hole(struct radix_tree_root *root, +- unsigned long index, unsigned long max_scan) +-{ +- unsigned long i; +- +- for (i = 0; i < max_scan; i++) { +- if (!radix_tree_lookup(root, index)) +- break; +- index--; +- if (index == ULONG_MAX) +- break; +- } +- +- return index; +-} +-EXPORT_SYMBOL(radix_tree_prev_hole); +- + /** + * radix_tree_gang_lookup - perform multiple lookup on a radix tree + * @root: radix tree root +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -688,6 +688,82 @@ int __lock_page_or_retry(struct page *pa + } + + /** ++ * page_cache_next_hole - find the next hole (not-present entry) ++ * @mapping: mapping ++ * @index: index ++ * @max_scan: maximum range to search ++ * ++ * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the ++ * lowest indexed hole. ++ * ++ * Returns: the index of the hole if found, otherwise returns an index ++ * outside of the set specified (in which case 'return - index >= ++ * max_scan' will be true). In rare cases of index wrap-around, 0 will ++ * be returned. ++ * ++ * page_cache_next_hole may be called under rcu_read_lock. However, ++ * like radix_tree_gang_lookup, this will not atomically search a ++ * snapshot of the tree at a single point in time. For example, if a ++ * hole is created at index 5, then subsequently a hole is created at ++ * index 10, page_cache_next_hole covering both indexes may return 10 ++ * if called under rcu_read_lock. ++ */ ++pgoff_t page_cache_next_hole(struct address_space *mapping, ++ pgoff_t index, unsigned long max_scan) ++{ ++ unsigned long i; ++ ++ for (i = 0; i < max_scan; i++) { ++ if (!radix_tree_lookup(&mapping->page_tree, index)) ++ break; ++ index++; ++ if (index == 0) ++ break; ++ } ++ ++ return index; ++} ++EXPORT_SYMBOL(page_cache_next_hole); ++ ++/** ++ * page_cache_prev_hole - find the prev hole (not-present entry) ++ * @mapping: mapping ++ * @index: index ++ * @max_scan: maximum range to search ++ * ++ * Search backwards in the range [max(index-max_scan+1, 0), index] for ++ * the first hole. ++ * ++ * Returns: the index of the hole if found, otherwise returns an index ++ * outside of the set specified (in which case 'index - return >= ++ * max_scan' will be true). In rare cases of wrap-around, ULONG_MAX ++ * will be returned. ++ * ++ * page_cache_prev_hole may be called under rcu_read_lock. However, ++ * like radix_tree_gang_lookup, this will not atomically search a ++ * snapshot of the tree at a single point in time. For example, if a ++ * hole is created at index 10, then subsequently a hole is created at ++ * index 5, page_cache_prev_hole covering both indexes may return 5 if ++ * called under rcu_read_lock. ++ */ ++pgoff_t page_cache_prev_hole(struct address_space *mapping, ++ pgoff_t index, unsigned long max_scan) ++{ ++ unsigned long i; ++ ++ for (i = 0; i < max_scan; i++) { ++ if (!radix_tree_lookup(&mapping->page_tree, index)) ++ break; ++ index--; ++ if (index == ULONG_MAX) ++ break; ++ } ++ ++ return index; ++} ++EXPORT_SYMBOL(page_cache_prev_hole); ++ ++/** + * find_get_page - find and get a page reference + * @mapping: the address_space to search + * @offset: the page index +--- a/mm/readahead.c ++++ b/mm/readahead.c +@@ -347,7 +347,7 @@ static pgoff_t count_history_pages(struc + pgoff_t head; + + rcu_read_lock(); +- head = radix_tree_prev_hole(&mapping->page_tree, offset - 1, max); ++ head = page_cache_prev_hole(mapping, offset - 1, max); + rcu_read_unlock(); + + return offset - 1 - head; +@@ -427,7 +427,7 @@ ondemand_readahead(struct address_space + pgoff_t start; + + rcu_read_lock(); +- start = radix_tree_next_hole(&mapping->page_tree, offset+1,max); ++ start = page_cache_next_hole(mapping, offset + 1, max); + rcu_read_unlock(); + + if (!start || start - offset > max) diff --git a/queue-3.14/mm-fs-prepare-for-non-page-entries-in-page-cache-radix-trees.patch b/queue-3.14/mm-fs-prepare-for-non-page-entries-in-page-cache-radix-trees.patch new file mode 100644 index 00000000000..b7ee04f9dae --- /dev/null +++ b/queue-3.14/mm-fs-prepare-for-non-page-entries-in-page-cache-radix-trees.patch @@ -0,0 +1,902 @@ +From 0cd6144aadd2afd19d1aca880153530c52957604 Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Thu, 3 Apr 2014 14:47:46 -0700 +Subject: mm + fs: prepare for non-page entries in page cache radix trees + +From: Johannes Weiner + +commit 0cd6144aadd2afd19d1aca880153530c52957604 upstream. + +shmem mappings already contain exceptional entries where swap slot +information is remembered. + +To be able to store eviction information for regular page cache, prepare +every site dealing with the radix trees directly to handle entries other +than pages. + +The common lookup functions will filter out non-page entries and return +NULL for page cache holes, just as before. But provide a raw version of +the API which returns non-page entries as well, and switch shmem over to +use it. + +Signed-off-by: Johannes Weiner +Reviewed-by: Rik van Riel +Reviewed-by: Minchan Kim +Cc: Andrea Arcangeli +Cc: Bob Liu +Cc: Christoph Hellwig +Cc: Dave Chinner +Cc: Greg Thelen +Cc: Hugh Dickins +Cc: Jan Kara +Cc: KOSAKI Motohiro +Cc: Luigi Semenzato +Cc: Mel Gorman +Cc: Metin Doslu +Cc: Michel Lespinasse +Cc: Ozgun Erdogan +Cc: Peter Zijlstra +Cc: Roman Gushchin +Cc: Ryan Mallon +Cc: Tejun Heo +Cc: Vlastimil Babka +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/compression.c | 2 + include/linux/mm.h | 8 + + include/linux/pagemap.h | 15 ++- + include/linux/pagevec.h | 5 + + include/linux/shmem_fs.h | 1 + mm/filemap.c | 202 +++++++++++++++++++++++++++++++++++++++++------ + mm/mincore.c | 20 +++- + mm/readahead.c | 2 + mm/shmem.c | 97 ++++------------------ + mm/swap.c | 51 +++++++++++ + mm/truncate.c | 74 ++++++++++++++--- + 11 files changed, 348 insertions(+), 129 deletions(-) + +--- a/fs/btrfs/compression.c ++++ b/fs/btrfs/compression.c +@@ -472,7 +472,7 @@ static noinline int add_ra_bio_pages(str + rcu_read_lock(); + page = radix_tree_lookup(&mapping->page_tree, pg_index); + rcu_read_unlock(); +- if (page) { ++ if (page && !radix_tree_exceptional_entry(page)) { + misses++; + if (misses > 4) + break; +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1041,6 +1041,14 @@ extern void show_free_areas(unsigned int + extern bool skip_free_areas_node(unsigned int flags, int nid); + + int shmem_zero_setup(struct vm_area_struct *); ++#ifdef CONFIG_SHMEM ++bool shmem_mapping(struct address_space *mapping); ++#else ++static inline bool shmem_mapping(struct address_space *mapping) ++{ ++ return false; ++} ++#endif + + extern int can_do_mlock(void); + extern int user_shm_lock(size_t, struct user_struct *); +--- a/include/linux/pagemap.h ++++ b/include/linux/pagemap.h +@@ -248,12 +248,15 @@ pgoff_t page_cache_next_hole(struct addr + pgoff_t page_cache_prev_hole(struct address_space *mapping, + pgoff_t index, unsigned long max_scan); + +-extern struct page * find_get_page(struct address_space *mapping, +- pgoff_t index); +-extern struct page * find_lock_page(struct address_space *mapping, +- pgoff_t index); +-extern struct page * find_or_create_page(struct address_space *mapping, +- pgoff_t index, gfp_t gfp_mask); ++struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); ++struct page *find_get_page(struct address_space *mapping, pgoff_t offset); ++struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); ++struct page *find_lock_page(struct address_space *mapping, pgoff_t offset); ++struct page *find_or_create_page(struct address_space *mapping, pgoff_t index, ++ gfp_t gfp_mask); ++unsigned find_get_entries(struct address_space *mapping, pgoff_t start, ++ unsigned int nr_entries, struct page **entries, ++ pgoff_t *indices); + unsigned find_get_pages(struct address_space *mapping, pgoff_t start, + unsigned int nr_pages, struct page **pages); + unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, +--- a/include/linux/pagevec.h ++++ b/include/linux/pagevec.h +@@ -22,6 +22,11 @@ struct pagevec { + + void __pagevec_release(struct pagevec *pvec); + void __pagevec_lru_add(struct pagevec *pvec); ++unsigned pagevec_lookup_entries(struct pagevec *pvec, ++ struct address_space *mapping, ++ pgoff_t start, unsigned nr_entries, ++ pgoff_t *indices); ++void pagevec_remove_exceptionals(struct pagevec *pvec); + unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, + pgoff_t start, unsigned nr_pages); + unsigned pagevec_lookup_tag(struct pagevec *pvec, +--- a/include/linux/shmem_fs.h ++++ b/include/linux/shmem_fs.h +@@ -51,6 +51,7 @@ extern struct file *shmem_kernel_file_se + unsigned long flags); + extern int shmem_zero_setup(struct vm_area_struct *); + extern int shmem_lock(struct file *file, int lock, struct user_struct *user); ++extern bool shmem_mapping(struct address_space *mapping); + extern void shmem_unlock_mapping(struct address_space *mapping); + extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, + pgoff_t index, gfp_t gfp_mask); +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -448,6 +448,29 @@ int replace_page_cache_page(struct page + } + EXPORT_SYMBOL_GPL(replace_page_cache_page); + ++static int page_cache_tree_insert(struct address_space *mapping, ++ struct page *page) ++{ ++ void **slot; ++ int error; ++ ++ slot = radix_tree_lookup_slot(&mapping->page_tree, page->index); ++ if (slot) { ++ void *p; ++ ++ p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); ++ if (!radix_tree_exceptional_entry(p)) ++ return -EEXIST; ++ radix_tree_replace_slot(slot, page); ++ mapping->nrpages++; ++ return 0; ++ } ++ error = radix_tree_insert(&mapping->page_tree, page->index, page); ++ if (!error) ++ mapping->nrpages++; ++ return error; ++} ++ + /** + * add_to_page_cache_locked - add a locked page to the pagecache + * @page: page to add +@@ -482,11 +505,10 @@ int add_to_page_cache_locked(struct page + page->index = offset; + + spin_lock_irq(&mapping->tree_lock); +- error = radix_tree_insert(&mapping->page_tree, offset, page); ++ error = page_cache_tree_insert(mapping, page); + radix_tree_preload_end(); + if (unlikely(error)) + goto err_insert; +- mapping->nrpages++; + __inc_zone_page_state(page, NR_FILE_PAGES); + spin_unlock_irq(&mapping->tree_lock); + trace_mm_filemap_add_to_page_cache(page); +@@ -714,7 +736,10 @@ pgoff_t page_cache_next_hole(struct addr + unsigned long i; + + for (i = 0; i < max_scan; i++) { +- if (!radix_tree_lookup(&mapping->page_tree, index)) ++ struct page *page; ++ ++ page = radix_tree_lookup(&mapping->page_tree, index); ++ if (!page || radix_tree_exceptional_entry(page)) + break; + index++; + if (index == 0) +@@ -752,7 +777,10 @@ pgoff_t page_cache_prev_hole(struct addr + unsigned long i; + + for (i = 0; i < max_scan; i++) { +- if (!radix_tree_lookup(&mapping->page_tree, index)) ++ struct page *page; ++ ++ page = radix_tree_lookup(&mapping->page_tree, index); ++ if (!page || radix_tree_exceptional_entry(page)) + break; + index--; + if (index == ULONG_MAX) +@@ -764,14 +792,19 @@ pgoff_t page_cache_prev_hole(struct addr + EXPORT_SYMBOL(page_cache_prev_hole); + + /** +- * find_get_page - find and get a page reference ++ * find_get_entry - find and get a page cache entry + * @mapping: the address_space to search +- * @offset: the page index ++ * @offset: the page cache index ++ * ++ * Looks up the page cache slot at @mapping & @offset. If there is a ++ * page cache page, it is returned with an increased refcount. ++ * ++ * If the slot holds a shadow entry of a previously evicted page, it ++ * is returned. + * +- * Is there a pagecache struct page at the given (mapping, offset) tuple? +- * If yes, increment its refcount and return it; if no, return NULL. ++ * Otherwise, %NULL is returned. + */ +-struct page *find_get_page(struct address_space *mapping, pgoff_t offset) ++struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) + { + void **pagep; + struct page *page; +@@ -812,24 +845,50 @@ out: + + return page; + } +-EXPORT_SYMBOL(find_get_page); ++EXPORT_SYMBOL(find_get_entry); + + /** +- * find_lock_page - locate, pin and lock a pagecache page ++ * find_get_page - find and get a page reference + * @mapping: the address_space to search + * @offset: the page index + * +- * Locates the desired pagecache page, locks it, increments its reference +- * count and returns its address. ++ * Looks up the page cache slot at @mapping & @offset. If there is a ++ * page cache page, it is returned with an increased refcount. + * +- * Returns zero if the page was not present. find_lock_page() may sleep. ++ * Otherwise, %NULL is returned. + */ +-struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) ++struct page *find_get_page(struct address_space *mapping, pgoff_t offset) ++{ ++ struct page *page = find_get_entry(mapping, offset); ++ ++ if (radix_tree_exceptional_entry(page)) ++ page = NULL; ++ return page; ++} ++EXPORT_SYMBOL(find_get_page); ++ ++/** ++ * find_lock_entry - locate, pin and lock a page cache entry ++ * @mapping: the address_space to search ++ * @offset: the page cache index ++ * ++ * Looks up the page cache slot at @mapping & @offset. If there is a ++ * page cache page, it is returned locked and with an increased ++ * refcount. ++ * ++ * If the slot holds a shadow entry of a previously evicted page, it ++ * is returned. ++ * ++ * Otherwise, %NULL is returned. ++ * ++ * find_lock_entry() may sleep. ++ */ ++struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset) + { + struct page *page; + + repeat: +- page = find_get_page(mapping, offset); ++ page = find_get_entry(mapping, offset); + if (page && !radix_tree_exception(page)) { + lock_page(page); + /* Has the page been truncated? */ +@@ -842,6 +901,29 @@ repeat: + } + return page; + } ++EXPORT_SYMBOL(find_lock_entry); ++ ++/** ++ * find_lock_page - locate, pin and lock a pagecache page ++ * @mapping: the address_space to search ++ * @offset: the page index ++ * ++ * Looks up the page cache slot at @mapping & @offset. If there is a ++ * page cache page, it is returned locked and with an increased ++ * refcount. ++ * ++ * Otherwise, %NULL is returned. ++ * ++ * find_lock_page() may sleep. ++ */ ++struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) ++{ ++ struct page *page = find_lock_entry(mapping, offset); ++ ++ if (radix_tree_exceptional_entry(page)) ++ page = NULL; ++ return page; ++} + EXPORT_SYMBOL(find_lock_page); + + /** +@@ -850,16 +932,18 @@ EXPORT_SYMBOL(find_lock_page); + * @index: the page's index into the mapping + * @gfp_mask: page allocation mode + * +- * Locates a page in the pagecache. If the page is not present, a new page +- * is allocated using @gfp_mask and is added to the pagecache and to the VM's +- * LRU list. The returned page is locked and has its reference count +- * incremented. ++ * Looks up the page cache slot at @mapping & @offset. If there is a ++ * page cache page, it is returned locked and with an increased ++ * refcount. ++ * ++ * If the page is not present, a new page is allocated using @gfp_mask ++ * and added to the page cache and the VM's LRU list. The page is ++ * returned locked and with an increased refcount. + * +- * find_or_create_page() may sleep, even if @gfp_flags specifies an atomic +- * allocation! ++ * On memory exhaustion, %NULL is returned. + * +- * find_or_create_page() returns the desired page's address, or zero on +- * memory exhaustion. ++ * find_or_create_page() may sleep, even if @gfp_flags specifies an ++ * atomic allocation! + */ + struct page *find_or_create_page(struct address_space *mapping, + pgoff_t index, gfp_t gfp_mask) +@@ -892,6 +976,76 @@ repeat: + EXPORT_SYMBOL(find_or_create_page); + + /** ++ * find_get_entries - gang pagecache lookup ++ * @mapping: The address_space to search ++ * @start: The starting page cache index ++ * @nr_entries: The maximum number of entries ++ * @entries: Where the resulting entries are placed ++ * @indices: The cache indices corresponding to the entries in @entries ++ * ++ * find_get_entries() will search for and return a group of up to ++ * @nr_entries entries in the mapping. The entries are placed at ++ * @entries. find_get_entries() takes a reference against any actual ++ * pages it returns. ++ * ++ * The search returns a group of mapping-contiguous page cache entries ++ * with ascending indexes. There may be holes in the indices due to ++ * not-present pages. ++ * ++ * Any shadow entries of evicted pages are included in the returned ++ * array. ++ * ++ * find_get_entries() returns the number of pages and shadow entries ++ * which were found. ++ */ ++unsigned find_get_entries(struct address_space *mapping, ++ pgoff_t start, unsigned int nr_entries, ++ struct page **entries, pgoff_t *indices) ++{ ++ void **slot; ++ unsigned int ret = 0; ++ struct radix_tree_iter iter; ++ ++ if (!nr_entries) ++ return 0; ++ ++ rcu_read_lock(); ++restart: ++ radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { ++ struct page *page; ++repeat: ++ page = radix_tree_deref_slot(slot); ++ if (unlikely(!page)) ++ continue; ++ if (radix_tree_exception(page)) { ++ if (radix_tree_deref_retry(page)) ++ goto restart; ++ /* ++ * Otherwise, we must be storing a swap entry ++ * here as an exceptional entry: so return it ++ * without attempting to raise page count. ++ */ ++ goto export; ++ } ++ if (!page_cache_get_speculative(page)) ++ goto repeat; ++ ++ /* Has the page moved? */ ++ if (unlikely(page != *slot)) { ++ page_cache_release(page); ++ goto repeat; ++ } ++export: ++ indices[ret] = iter.index; ++ entries[ret] = page; ++ if (++ret == nr_entries) ++ break; ++ } ++ rcu_read_unlock(); ++ return ret; ++} ++ ++/** + * find_get_pages - gang pagecache lookup + * @mapping: The address_space to search + * @start: The starting page index +--- a/mm/mincore.c ++++ b/mm/mincore.c +@@ -70,13 +70,21 @@ static unsigned char mincore_page(struct + * any other file mapping (ie. marked !present and faulted in with + * tmpfs's .fault). So swapped out tmpfs mappings are tested here. + */ +- page = find_get_page(mapping, pgoff); + #ifdef CONFIG_SWAP +- /* shmem/tmpfs may return swap: account for swapcache page too. */ +- if (radix_tree_exceptional_entry(page)) { +- swp_entry_t swap = radix_to_swp_entry(page); +- page = find_get_page(swap_address_space(swap), swap.val); +- } ++ if (shmem_mapping(mapping)) { ++ page = find_get_entry(mapping, pgoff); ++ /* ++ * shmem/tmpfs may return swap: account for swapcache ++ * page too. ++ */ ++ if (radix_tree_exceptional_entry(page)) { ++ swp_entry_t swp = radix_to_swp_entry(page); ++ page = find_get_page(swap_address_space(swp), swp.val); ++ } ++ } else ++ page = find_get_page(mapping, pgoff); ++#else ++ page = find_get_page(mapping, pgoff); + #endif + if (page) { + present = PageUptodate(page); +--- a/mm/readahead.c ++++ b/mm/readahead.c +@@ -179,7 +179,7 @@ __do_page_cache_readahead(struct address + rcu_read_lock(); + page = radix_tree_lookup(&mapping->page_tree, page_offset); + rcu_read_unlock(); +- if (page) ++ if (page && !radix_tree_exceptional_entry(page)) + continue; + + page = page_cache_alloc_readahead(mapping); +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -330,56 +330,6 @@ static void shmem_delete_from_page_cache + } + + /* +- * Like find_get_pages, but collecting swap entries as well as pages. +- */ +-static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, +- pgoff_t start, unsigned int nr_pages, +- struct page **pages, pgoff_t *indices) +-{ +- void **slot; +- unsigned int ret = 0; +- struct radix_tree_iter iter; +- +- if (!nr_pages) +- return 0; +- +- rcu_read_lock(); +-restart: +- radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { +- struct page *page; +-repeat: +- page = radix_tree_deref_slot(slot); +- if (unlikely(!page)) +- continue; +- if (radix_tree_exception(page)) { +- if (radix_tree_deref_retry(page)) +- goto restart; +- /* +- * Otherwise, we must be storing a swap entry +- * here as an exceptional entry: so return it +- * without attempting to raise page count. +- */ +- goto export; +- } +- if (!page_cache_get_speculative(page)) +- goto repeat; +- +- /* Has the page moved? */ +- if (unlikely(page != *slot)) { +- page_cache_release(page); +- goto repeat; +- } +-export: +- indices[ret] = iter.index; +- pages[ret] = page; +- if (++ret == nr_pages) +- break; +- } +- rcu_read_unlock(); +- return ret; +-} +- +-/* + * Remove swap entry from radix tree, free the swap and its page cache. + */ + static int shmem_free_swap(struct address_space *mapping, +@@ -397,21 +347,6 @@ static int shmem_free_swap(struct addres + } + + /* +- * Pagevec may contain swap entries, so shuffle up pages before releasing. +- */ +-static void shmem_deswap_pagevec(struct pagevec *pvec) +-{ +- int i, j; +- +- for (i = 0, j = 0; i < pagevec_count(pvec); i++) { +- struct page *page = pvec->pages[i]; +- if (!radix_tree_exceptional_entry(page)) +- pvec->pages[j++] = page; +- } +- pvec->nr = j; +-} +- +-/* + * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists. + */ + void shmem_unlock_mapping(struct address_space *mapping) +@@ -429,12 +364,12 @@ void shmem_unlock_mapping(struct address + * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it + * has finished, if it hits a row of PAGEVEC_SIZE swap entries. + */ +- pvec.nr = shmem_find_get_pages_and_swap(mapping, index, +- PAGEVEC_SIZE, pvec.pages, indices); ++ pvec.nr = find_get_entries(mapping, index, ++ PAGEVEC_SIZE, pvec.pages, indices); + if (!pvec.nr) + break; + index = indices[pvec.nr - 1] + 1; +- shmem_deswap_pagevec(&pvec); ++ pagevec_remove_exceptionals(&pvec); + check_move_unevictable_pages(pvec.pages, pvec.nr); + pagevec_release(&pvec); + cond_resched(); +@@ -466,9 +401,9 @@ static void shmem_undo_range(struct inod + pagevec_init(&pvec, 0); + index = start; + while (index < end) { +- pvec.nr = shmem_find_get_pages_and_swap(mapping, index, +- min(end - index, (pgoff_t)PAGEVEC_SIZE), +- pvec.pages, indices); ++ pvec.nr = find_get_entries(mapping, index, ++ min(end - index, (pgoff_t)PAGEVEC_SIZE), ++ pvec.pages, indices); + if (!pvec.nr) + break; + mem_cgroup_uncharge_start(); +@@ -497,7 +432,7 @@ static void shmem_undo_range(struct inod + } + unlock_page(page); + } +- shmem_deswap_pagevec(&pvec); ++ pagevec_remove_exceptionals(&pvec); + pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + cond_resched(); +@@ -535,9 +470,10 @@ static void shmem_undo_range(struct inod + index = start; + while (index < end) { + cond_resched(); +- pvec.nr = shmem_find_get_pages_and_swap(mapping, index, ++ ++ pvec.nr = find_get_entries(mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE), +- pvec.pages, indices); ++ pvec.pages, indices); + if (!pvec.nr) { + /* If all gone or hole-punch or unfalloc, we're done */ + if (index == start || end != -1) +@@ -580,7 +516,7 @@ static void shmem_undo_range(struct inod + } + unlock_page(page); + } +- shmem_deswap_pagevec(&pvec); ++ pagevec_remove_exceptionals(&pvec); + pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + index++; +@@ -1087,7 +1023,7 @@ static int shmem_getpage_gfp(struct inod + return -EFBIG; + repeat: + swap.val = 0; +- page = find_lock_page(mapping, index); ++ page = find_lock_entry(mapping, index); + if (radix_tree_exceptional_entry(page)) { + swap = radix_to_swp_entry(page); + page = NULL; +@@ -1482,6 +1418,11 @@ static struct inode *shmem_get_inode(str + return inode; + } + ++bool shmem_mapping(struct address_space *mapping) ++{ ++ return mapping->backing_dev_info == &shmem_backing_dev_info; ++} ++ + #ifdef CONFIG_TMPFS + static const struct inode_operations shmem_symlink_inode_operations; + static const struct inode_operations shmem_short_symlink_operations; +@@ -1794,7 +1735,7 @@ static pgoff_t shmem_seek_hole_data(stru + pagevec_init(&pvec, 0); + pvec.nr = 1; /* start small: we may be there already */ + while (!done) { +- pvec.nr = shmem_find_get_pages_and_swap(mapping, index, ++ pvec.nr = find_get_entries(mapping, index, + pvec.nr, pvec.pages, indices); + if (!pvec.nr) { + if (whence == SEEK_DATA) +@@ -1821,7 +1762,7 @@ static pgoff_t shmem_seek_hole_data(stru + break; + } + } +- shmem_deswap_pagevec(&pvec); ++ pagevec_remove_exceptionals(&pvec); + pagevec_release(&pvec); + pvec.nr = PAGEVEC_SIZE; + cond_resched(); +--- a/mm/swap.c ++++ b/mm/swap.c +@@ -948,6 +948,57 @@ void __pagevec_lru_add(struct pagevec *p + EXPORT_SYMBOL(__pagevec_lru_add); + + /** ++ * pagevec_lookup_entries - gang pagecache lookup ++ * @pvec: Where the resulting entries are placed ++ * @mapping: The address_space to search ++ * @start: The starting entry index ++ * @nr_entries: The maximum number of entries ++ * @indices: The cache indices corresponding to the entries in @pvec ++ * ++ * pagevec_lookup_entries() will search for and return a group of up ++ * to @nr_entries pages and shadow entries in the mapping. All ++ * entries are placed in @pvec. pagevec_lookup_entries() takes a ++ * reference against actual pages in @pvec. ++ * ++ * The search returns a group of mapping-contiguous entries with ++ * ascending indexes. There may be holes in the indices due to ++ * not-present entries. ++ * ++ * pagevec_lookup_entries() returns the number of entries which were ++ * found. ++ */ ++unsigned pagevec_lookup_entries(struct pagevec *pvec, ++ struct address_space *mapping, ++ pgoff_t start, unsigned nr_pages, ++ pgoff_t *indices) ++{ ++ pvec->nr = find_get_entries(mapping, start, nr_pages, ++ pvec->pages, indices); ++ return pagevec_count(pvec); ++} ++ ++/** ++ * pagevec_remove_exceptionals - pagevec exceptionals pruning ++ * @pvec: The pagevec to prune ++ * ++ * pagevec_lookup_entries() fills both pages and exceptional radix ++ * tree entries into the pagevec. This function prunes all ++ * exceptionals from @pvec without leaving holes, so that it can be ++ * passed on to page-only pagevec operations. ++ */ ++void pagevec_remove_exceptionals(struct pagevec *pvec) ++{ ++ int i, j; ++ ++ for (i = 0, j = 0; i < pagevec_count(pvec); i++) { ++ struct page *page = pvec->pages[i]; ++ if (!radix_tree_exceptional_entry(page)) ++ pvec->pages[j++] = page; ++ } ++ pvec->nr = j; ++} ++ ++/** + * pagevec_lookup - gang pagecache lookup + * @pvec: Where the resulting pages are placed + * @mapping: The address_space to search +--- a/mm/truncate.c ++++ b/mm/truncate.c +@@ -23,6 +23,22 @@ + #include + #include "internal.h" + ++static void clear_exceptional_entry(struct address_space *mapping, ++ pgoff_t index, void *entry) ++{ ++ /* Handled by shmem itself */ ++ if (shmem_mapping(mapping)) ++ return; ++ ++ spin_lock_irq(&mapping->tree_lock); ++ /* ++ * Regular page slots are stabilized by the page lock even ++ * without the tree itself locked. These unlocked entries ++ * need verification under the tree lock. ++ */ ++ radix_tree_delete_item(&mapping->page_tree, index, entry); ++ spin_unlock_irq(&mapping->tree_lock); ++} + + /** + * do_invalidatepage - invalidate part or all of a page +@@ -209,6 +225,7 @@ void truncate_inode_pages_range(struct a + unsigned int partial_start; /* inclusive */ + unsigned int partial_end; /* exclusive */ + struct pagevec pvec; ++ pgoff_t indices[PAGEVEC_SIZE]; + pgoff_t index; + int i; + +@@ -239,17 +256,23 @@ void truncate_inode_pages_range(struct a + + pagevec_init(&pvec, 0); + index = start; +- while (index < end && pagevec_lookup(&pvec, mapping, index, +- min(end - index, (pgoff_t)PAGEVEC_SIZE))) { ++ while (index < end && pagevec_lookup_entries(&pvec, mapping, index, ++ min(end - index, (pgoff_t)PAGEVEC_SIZE), ++ indices)) { + mem_cgroup_uncharge_start(); + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + + /* We rely upon deletion not changing page->index */ +- index = page->index; ++ index = indices[i]; + if (index >= end) + break; + ++ if (radix_tree_exceptional_entry(page)) { ++ clear_exceptional_entry(mapping, index, page); ++ continue; ++ } ++ + if (!trylock_page(page)) + continue; + WARN_ON(page->index != index); +@@ -260,6 +283,7 @@ void truncate_inode_pages_range(struct a + truncate_inode_page(mapping, page); + unlock_page(page); + } ++ pagevec_remove_exceptionals(&pvec); + pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + cond_resched(); +@@ -308,14 +332,16 @@ void truncate_inode_pages_range(struct a + index = start; + for ( ; ; ) { + cond_resched(); +- if (!pagevec_lookup(&pvec, mapping, index, +- min(end - index, (pgoff_t)PAGEVEC_SIZE))) { ++ if (!pagevec_lookup_entries(&pvec, mapping, index, ++ min(end - index, (pgoff_t)PAGEVEC_SIZE), ++ indices)) { + if (index == start) + break; + index = start; + continue; + } +- if (index == start && pvec.pages[0]->index >= end) { ++ if (index == start && indices[0] >= end) { ++ pagevec_remove_exceptionals(&pvec); + pagevec_release(&pvec); + break; + } +@@ -324,16 +350,22 @@ void truncate_inode_pages_range(struct a + struct page *page = pvec.pages[i]; + + /* We rely upon deletion not changing page->index */ +- index = page->index; ++ index = indices[i]; + if (index >= end) + break; + ++ if (radix_tree_exceptional_entry(page)) { ++ clear_exceptional_entry(mapping, index, page); ++ continue; ++ } ++ + lock_page(page); + WARN_ON(page->index != index); + wait_on_page_writeback(page); + truncate_inode_page(mapping, page); + unlock_page(page); + } ++ pagevec_remove_exceptionals(&pvec); + pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + index++; +@@ -376,6 +408,7 @@ EXPORT_SYMBOL(truncate_inode_pages); + unsigned long invalidate_mapping_pages(struct address_space *mapping, + pgoff_t start, pgoff_t end) + { ++ pgoff_t indices[PAGEVEC_SIZE]; + struct pagevec pvec; + pgoff_t index = start; + unsigned long ret; +@@ -391,17 +424,23 @@ unsigned long invalidate_mapping_pages(s + */ + + pagevec_init(&pvec, 0); +- while (index <= end && pagevec_lookup(&pvec, mapping, index, +- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { ++ while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, ++ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, ++ indices)) { + mem_cgroup_uncharge_start(); + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + + /* We rely upon deletion not changing page->index */ +- index = page->index; ++ index = indices[i]; + if (index > end) + break; + ++ if (radix_tree_exceptional_entry(page)) { ++ clear_exceptional_entry(mapping, index, page); ++ continue; ++ } ++ + if (!trylock_page(page)) + continue; + WARN_ON(page->index != index); +@@ -415,6 +454,7 @@ unsigned long invalidate_mapping_pages(s + deactivate_page(page); + count += ret; + } ++ pagevec_remove_exceptionals(&pvec); + pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + cond_resched(); +@@ -482,6 +522,7 @@ static int do_launder_page(struct addres + int invalidate_inode_pages2_range(struct address_space *mapping, + pgoff_t start, pgoff_t end) + { ++ pgoff_t indices[PAGEVEC_SIZE]; + struct pagevec pvec; + pgoff_t index; + int i; +@@ -492,17 +533,23 @@ int invalidate_inode_pages2_range(struct + cleancache_invalidate_inode(mapping); + pagevec_init(&pvec, 0); + index = start; +- while (index <= end && pagevec_lookup(&pvec, mapping, index, +- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { ++ while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, ++ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, ++ indices)) { + mem_cgroup_uncharge_start(); + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + + /* We rely upon deletion not changing page->index */ +- index = page->index; ++ index = indices[i]; + if (index > end) + break; + ++ if (radix_tree_exceptional_entry(page)) { ++ clear_exceptional_entry(mapping, index, page); ++ continue; ++ } ++ + lock_page(page); + WARN_ON(page->index != index); + if (page->mapping != mapping) { +@@ -540,6 +587,7 @@ int invalidate_inode_pages2_range(struct + ret = ret2; + unlock_page(page); + } ++ pagevec_remove_exceptionals(&pvec); + pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + cond_resched(); diff --git a/queue-3.14/mm-madvise-fix-madv_willneed-on-shmem-swapouts.patch b/queue-3.14/mm-madvise-fix-madv_willneed-on-shmem-swapouts.patch new file mode 100644 index 00000000000..b593627fc1c --- /dev/null +++ b/queue-3.14/mm-madvise-fix-madv_willneed-on-shmem-swapouts.patch @@ -0,0 +1,42 @@ +From 55231e5c898c5c03c14194001e349f40f59bd300 Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Thu, 22 May 2014 11:54:17 -0700 +Subject: mm: madvise: fix MADV_WILLNEED on shmem swapouts + +From: Johannes Weiner + +commit 55231e5c898c5c03c14194001e349f40f59bd300 upstream. + +MADV_WILLNEED currently does not read swapped out shmem pages back in. + +Commit 0cd6144aadd2 ("mm + fs: prepare for non-page entries in page +cache radix trees") made find_get_page() filter exceptional radix tree +entries but failed to convert all find_get_page() callers that WANT +exceptional entries over to find_get_entry(). One of them is shmem swap +readahead in madvise, which now skips over any swap-out records. + +Convert it to find_get_entry(). + +Fixes: 0cd6144aadd2 ("mm + fs: prepare for non-page entries in page cache radix trees") +Signed-off-by: Johannes Weiner +Reported-by: Hugh Dickins +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + +--- + mm/madvise.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/madvise.c ++++ b/mm/madvise.c +@@ -195,7 +195,7 @@ static void force_shm_swapin_readahead(s + for (; start < end; start += PAGE_SIZE) { + index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + +- page = find_get_page(mapping, index); ++ page = find_get_entry(mapping, index); + if (!radix_tree_exceptional_entry(page)) { + if (page) + page_cache_release(page); diff --git a/queue-3.14/mm-migration-add-destination-page-freeing-callback.patch b/queue-3.14/mm-migration-add-destination-page-freeing-callback.patch new file mode 100644 index 00000000000..43996219ea3 --- /dev/null +++ b/queue-3.14/mm-migration-add-destination-page-freeing-callback.patch @@ -0,0 +1,285 @@ +From 68711a746345c44ae00c64d8dbac6a9ce13ac54a Mon Sep 17 00:00:00 2001 +From: David Rientjes +Date: Wed, 4 Jun 2014 16:08:25 -0700 +Subject: mm, migration: add destination page freeing callback + +From: David Rientjes + +commit 68711a746345c44ae00c64d8dbac6a9ce13ac54a upstream. + +Memory migration uses a callback defined by the caller to determine how to +allocate destination pages. When migration fails for a source page, +however, it frees the destination page back to the system. + +This patch adds a memory migration callback defined by the caller to +determine how to free destination pages. If a caller, such as memory +compaction, builds its own freelist for migration targets, this can reuse +already freed memory instead of scanning additional memory. + +If the caller provides a function to handle freeing of destination pages, +it is called when page migration fails. If the caller passes NULL then +freeing back to the system will be handled as usual. This patch +introduces no functional change. + +Signed-off-by: David Rientjes +Reviewed-by: Naoya Horiguchi +Acked-by: Mel Gorman +Acked-by: Vlastimil Babka +Cc: Greg Thelen +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + + +--- + include/linux/migrate.h | 11 ++++++--- + mm/compaction.c | 2 - + mm/memory-failure.c | 4 +-- + mm/memory_hotplug.c | 2 - + mm/mempolicy.c | 4 +-- + mm/migrate.c | 55 ++++++++++++++++++++++++++++++++++-------------- + mm/page_alloc.c | 2 - + 7 files changed, 53 insertions(+), 27 deletions(-) + +--- a/include/linux/migrate.h ++++ b/include/linux/migrate.h +@@ -5,7 +5,9 @@ + #include + #include + +-typedef struct page *new_page_t(struct page *, unsigned long private, int **); ++typedef struct page *new_page_t(struct page *page, unsigned long private, ++ int **reason); ++typedef void free_page_t(struct page *page, unsigned long private); + + /* + * Return values from addresss_space_operations.migratepage(): +@@ -38,7 +40,7 @@ enum migrate_reason { + extern void putback_movable_pages(struct list_head *l); + extern int migrate_page(struct address_space *, + struct page *, struct page *, enum migrate_mode); +-extern int migrate_pages(struct list_head *l, new_page_t x, ++extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, + unsigned long private, enum migrate_mode mode, int reason); + + extern int migrate_prep(void); +@@ -56,8 +58,9 @@ extern int migrate_page_move_mapping(str + #else + + static inline void putback_movable_pages(struct list_head *l) {} +-static inline int migrate_pages(struct list_head *l, new_page_t x, +- unsigned long private, enum migrate_mode mode, int reason) ++static inline int migrate_pages(struct list_head *l, new_page_t new, ++ free_page_t free, unsigned long private, enum migrate_mode mode, ++ int reason) + { return -ENOSYS; } + + static inline int migrate_prep(void) { return -ENOSYS; } +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -1016,7 +1016,7 @@ static int compact_zone(struct zone *zon + } + + nr_migrate = cc->nr_migratepages; +- err = migrate_pages(&cc->migratepages, compaction_alloc, ++ err = migrate_pages(&cc->migratepages, compaction_alloc, NULL, + (unsigned long)cc, + cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC, + MR_COMPACTION); +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -1540,7 +1540,7 @@ static int soft_offline_huge_page(struct + + /* Keep page count to indicate a given hugepage is isolated. */ + list_move(&hpage->lru, &pagelist); +- ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, ++ ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, + MIGRATE_SYNC, MR_MEMORY_FAILURE); + if (ret) { + pr_info("soft offline: %#lx: migration failed %d, type %lx\n", +@@ -1621,7 +1621,7 @@ static int __soft_offline_page(struct pa + inc_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + list_add(&page->lru, &pagelist); +- ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, ++ ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, + MIGRATE_SYNC, MR_MEMORY_FAILURE); + if (ret) { + if (!list_empty(&pagelist)) { +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -1332,7 +1332,7 @@ do_migrate_range(unsigned long start_pfn + * alloc_migrate_target should be improooooved!! + * migrate_pages returns # of failed pages. + */ +- ret = migrate_pages(&source, alloc_migrate_target, 0, ++ ret = migrate_pages(&source, alloc_migrate_target, NULL, 0, + MIGRATE_SYNC, MR_MEMORY_HOTPLUG); + if (ret) + putback_movable_pages(&source); +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -1060,7 +1060,7 @@ static int migrate_to_node(struct mm_str + flags | MPOL_MF_DISCONTIG_OK, &pagelist); + + if (!list_empty(&pagelist)) { +- err = migrate_pages(&pagelist, new_node_page, dest, ++ err = migrate_pages(&pagelist, new_node_page, NULL, dest, + MIGRATE_SYNC, MR_SYSCALL); + if (err) + putback_movable_pages(&pagelist); +@@ -1306,7 +1306,7 @@ static long do_mbind(unsigned long start + + if (!list_empty(&pagelist)) { + WARN_ON_ONCE(flags & MPOL_MF_LAZY); +- nr_failed = migrate_pages(&pagelist, new_page, ++ nr_failed = migrate_pages(&pagelist, new_page, NULL, + start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND); + if (nr_failed) + putback_movable_pages(&pagelist); +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -941,8 +941,9 @@ out: + * Obtain the lock on page, remove all ptes and migrate the page + * to the newly allocated page in newpage. + */ +-static int unmap_and_move(new_page_t get_new_page, unsigned long private, +- struct page *page, int force, enum migrate_mode mode) ++static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page, ++ unsigned long private, struct page *page, int force, ++ enum migrate_mode mode) + { + int rc = 0; + int *result = NULL; +@@ -986,11 +987,17 @@ out: + page_is_file_cache(page)); + putback_lru_page(page); + } ++ + /* +- * Move the new page to the LRU. If migration was not successful +- * then this will free the page. ++ * If migration was not successful and there's a freeing callback, use ++ * it. Otherwise, putback_lru_page() will drop the reference grabbed ++ * during isolation. + */ +- putback_lru_page(newpage); ++ if (rc != MIGRATEPAGE_SUCCESS && put_new_page) ++ put_new_page(newpage, private); ++ else ++ putback_lru_page(newpage); ++ + if (result) { + if (rc) + *result = rc; +@@ -1019,8 +1026,9 @@ out: + * will wait in the page fault for migration to complete. + */ + static int unmap_and_move_huge_page(new_page_t get_new_page, +- unsigned long private, struct page *hpage, +- int force, enum migrate_mode mode) ++ free_page_t put_new_page, unsigned long private, ++ struct page *hpage, int force, ++ enum migrate_mode mode) + { + int rc = 0; + int *result = NULL; +@@ -1059,20 +1067,30 @@ static int unmap_and_move_huge_page(new_ + if (!page_mapped(hpage)) + rc = move_to_new_page(new_hpage, hpage, 1, mode); + +- if (rc) ++ if (rc != MIGRATEPAGE_SUCCESS) + remove_migration_ptes(hpage, hpage); + + if (anon_vma) + put_anon_vma(anon_vma); + +- if (!rc) ++ if (rc == MIGRATEPAGE_SUCCESS) + hugetlb_cgroup_migrate(hpage, new_hpage); + + unlock_page(hpage); + out: + if (rc != -EAGAIN) + putback_active_hugepage(hpage); +- put_page(new_hpage); ++ ++ /* ++ * If migration was not successful and there's a freeing callback, use ++ * it. Otherwise, put_page() will drop the reference grabbed during ++ * isolation. ++ */ ++ if (rc != MIGRATEPAGE_SUCCESS && put_new_page) ++ put_new_page(new_hpage, private); ++ else ++ put_page(new_hpage); ++ + if (result) { + if (rc) + *result = rc; +@@ -1089,6 +1107,8 @@ out: + * @from: The list of pages to be migrated. + * @get_new_page: The function used to allocate free pages to be used + * as the target of the page migration. ++ * @put_new_page: The function used to free target pages if migration ++ * fails, or NULL if no special handling is necessary. + * @private: Private data to be passed on to get_new_page() + * @mode: The migration mode that specifies the constraints for + * page migration, if any. +@@ -1102,7 +1122,8 @@ out: + * Returns the number of pages that were not migrated, or an error code. + */ + int migrate_pages(struct list_head *from, new_page_t get_new_page, +- unsigned long private, enum migrate_mode mode, int reason) ++ free_page_t put_new_page, unsigned long private, ++ enum migrate_mode mode, int reason) + { + int retry = 1; + int nr_failed = 0; +@@ -1124,10 +1145,11 @@ int migrate_pages(struct list_head *from + + if (PageHuge(page)) + rc = unmap_and_move_huge_page(get_new_page, +- private, page, pass > 2, mode); ++ put_new_page, private, page, ++ pass > 2, mode); + else +- rc = unmap_and_move(get_new_page, private, +- page, pass > 2, mode); ++ rc = unmap_and_move(get_new_page, put_new_page, ++ private, page, pass > 2, mode); + + switch(rc) { + case -ENOMEM: +@@ -1276,7 +1298,7 @@ set_status: + + err = 0; + if (!list_empty(&pagelist)) { +- err = migrate_pages(&pagelist, new_page_node, ++ err = migrate_pages(&pagelist, new_page_node, NULL, + (unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL); + if (err) + putback_movable_pages(&pagelist); +@@ -1732,7 +1754,8 @@ int migrate_misplaced_page(struct page * + + list_add(&page->lru, &migratepages); + nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page, +- node, MIGRATE_ASYNC, MR_NUMA_MISPLACED); ++ NULL, node, MIGRATE_ASYNC, ++ MR_NUMA_MISPLACED); + if (nr_remaining) { + if (!list_empty(&migratepages)) { + list_del(&page->lru); +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -6261,7 +6261,7 @@ static int __alloc_contig_migrate_range( + cc->nr_migratepages -= nr_reclaimed; + + ret = migrate_pages(&cc->migratepages, alloc_migrate_target, +- 0, MIGRATE_SYNC, MR_CMA); ++ NULL, 0, MIGRATE_SYNC, MR_CMA); + } + if (ret < 0) { + putback_movable_pages(&cc->migratepages); diff --git a/queue-3.14/mm-readahead.c-inline-ra_submit.patch b/queue-3.14/mm-readahead.c-inline-ra_submit.patch new file mode 100644 index 00000000000..4e81e59a270 --- /dev/null +++ b/queue-3.14/mm-readahead.c-inline-ra_submit.patch @@ -0,0 +1,123 @@ +From 29f175d125f0f3a9503af8a5596f93d714cceb08 Mon Sep 17 00:00:00 2001 +From: Fabian Frederick +Date: Mon, 7 Apr 2014 15:37:55 -0700 +Subject: mm/readahead.c: inline ra_submit + +From: Fabian Frederick + +commit 29f175d125f0f3a9503af8a5596f93d714cceb08 upstream. + +Commit f9acc8c7b35a ("readahead: sanify file_ra_state names") left +ra_submit with a single function call. + +Move ra_submit to internal.h and inline it to save some stack. Thanks +to Andrew Morton for commenting different versions. + +Signed-off-by: Fabian Frederick +Suggested-by: Andrew Morton +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mm.h | 3 --- + mm/internal.h | 15 +++++++++++++++ + mm/readahead.c | 21 +++------------------ + 3 files changed, 18 insertions(+), 21 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1856,9 +1856,6 @@ void page_cache_async_readahead(struct a + unsigned long size); + + unsigned long max_sane_readahead(unsigned long nr); +-unsigned long ra_submit(struct file_ra_state *ra, +- struct address_space *mapping, +- struct file *filp); + + /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ + extern int expand_stack(struct vm_area_struct *vma, unsigned long address); +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -11,6 +11,7 @@ + #ifndef __MM_INTERNAL_H + #define __MM_INTERNAL_H + ++#include + #include + + void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, +@@ -21,6 +22,20 @@ static inline void set_page_count(struct + atomic_set(&page->_count, v); + } + ++extern int __do_page_cache_readahead(struct address_space *mapping, ++ struct file *filp, pgoff_t offset, unsigned long nr_to_read, ++ unsigned long lookahead_size); ++ ++/* ++ * Submit IO for the read-ahead request in file_ra_state. ++ */ ++static inline unsigned long ra_submit(struct file_ra_state *ra, ++ struct address_space *mapping, struct file *filp) ++{ ++ return __do_page_cache_readahead(mapping, filp, ++ ra->start, ra->size, ra->async_size); ++} ++ + /* + * Turn a non-refcounted page (->_count == 0) into refcounted with + * a count of one. +--- a/mm/readahead.c ++++ b/mm/readahead.c +@@ -8,9 +8,7 @@ + */ + + #include +-#include + #include +-#include + #include + #include + #include +@@ -20,6 +18,8 @@ + #include + #include + ++#include "internal.h" ++ + /* + * Initialise a struct file's readahead state. Assumes that the caller has + * memset *ra to zero. +@@ -149,8 +149,7 @@ out: + * + * Returns the number of pages requested, or the maximum amount of I/O allowed. + */ +-static int +-__do_page_cache_readahead(struct address_space *mapping, struct file *filp, ++int __do_page_cache_readahead(struct address_space *mapping, struct file *filp, + pgoff_t offset, unsigned long nr_to_read, + unsigned long lookahead_size) + { +@@ -244,20 +243,6 @@ unsigned long max_sane_readahead(unsigne + } + + /* +- * Submit IO for the read-ahead request in file_ra_state. +- */ +-unsigned long ra_submit(struct file_ra_state *ra, +- struct address_space *mapping, struct file *filp) +-{ +- int actual; +- +- actual = __do_page_cache_readahead(mapping, filp, +- ra->start, ra->size, ra->async_size); +- +- return actual; +-} +- +-/* + * Set the initial window size, round to next power of 2 and square + * for small size, x 4 for medium, and x 2 for large + * for 128k (32 page) max ra diff --git a/queue-3.14/mm-remove-read_cache_page_async.patch b/queue-3.14/mm-remove-read_cache_page_async.patch new file mode 100644 index 00000000000..8accf4cb7dc --- /dev/null +++ b/queue-3.14/mm-remove-read_cache_page_async.patch @@ -0,0 +1,222 @@ +From 67f9fd91f93c582b7de2ab9325b6e179db77e4d5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 3 Apr 2014 14:48:18 -0700 +Subject: mm: remove read_cache_page_async() + +From: Sasha Levin + +commit 67f9fd91f93c582b7de2ab9325b6e179db77e4d5 upstream. + +This patch removes read_cache_page_async() which wasn't really needed +anywhere and simplifies the code around it a bit. + +read_cache_page_async() is useful when we want to read a page into the +cache without waiting for it to complete. This happens when the +appropriate callback 'filler' doesn't complete its read operation and +releases the page lock immediately, and instead queues a different +completion routine to do that. This never actually happened anywhere in +the code. + +read_cache_page_async() had 3 different callers: + +- read_cache_page() which is the sync version, it would just wait for + the requested read to complete using wait_on_page_read(). + +- JFFS2 would call it from jffs2_gc_fetch_page(), but the filler + function it supplied doesn't do any async reads, and would complete + before the filler function returns - making it actually a sync read. + +- CRAMFS would call it using the read_mapping_page_async() wrapper, with + a similar story to JFFS2 - the filler function doesn't do anything that + reminds async reads and would always complete before the filler function + returns. + +To sum it up, the code in mm/filemap.c never took advantage of having +read_cache_page_async(). While there are filler callbacks that do async +reads (such as the block one), we always called it with the +read_cache_page(). + +This patch adds a mandatory wait for read to complete when adding a new +page to the cache, and removes read_cache_page_async() and its wrappers. + +Signed-off-by: Sasha Levin +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cramfs/inode.c | 3 -- + fs/jffs2/fs.c | 2 - + include/linux/pagemap.h | 10 ------- + mm/filemap.c | 64 +++++++++++++++++------------------------------- + 4 files changed, 25 insertions(+), 54 deletions(-) + +--- a/fs/cramfs/inode.c ++++ b/fs/cramfs/inode.c +@@ -195,8 +195,7 @@ static void *cramfs_read(struct super_bl + struct page *page = NULL; + + if (blocknr + i < devsize) { +- page = read_mapping_page_async(mapping, blocknr + i, +- NULL); ++ page = read_mapping_page(mapping, blocknr + i, NULL); + /* synchronous error? */ + if (IS_ERR(page)) + page = NULL; +--- a/fs/jffs2/fs.c ++++ b/fs/jffs2/fs.c +@@ -687,7 +687,7 @@ unsigned char *jffs2_gc_fetch_page(struc + struct inode *inode = OFNI_EDONI_2SFFJ(f); + struct page *pg; + +- pg = read_cache_page_async(inode->i_mapping, offset >> PAGE_CACHE_SHIFT, ++ pg = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT, + (void *)jffs2_do_readpage_unlock, inode); + if (IS_ERR(pg)) + return (void *)pg; +--- a/include/linux/pagemap.h ++++ b/include/linux/pagemap.h +@@ -278,8 +278,6 @@ static inline struct page *grab_cache_pa + + extern struct page * grab_cache_page_nowait(struct address_space *mapping, + pgoff_t index); +-extern struct page * read_cache_page_async(struct address_space *mapping, +- pgoff_t index, filler_t *filler, void *data); + extern struct page * read_cache_page(struct address_space *mapping, + pgoff_t index, filler_t *filler, void *data); + extern struct page * read_cache_page_gfp(struct address_space *mapping, +@@ -287,14 +285,6 @@ extern struct page * read_cache_page_gfp + extern int read_cache_pages(struct address_space *mapping, + struct list_head *pages, filler_t *filler, void *data); + +-static inline struct page *read_mapping_page_async( +- struct address_space *mapping, +- pgoff_t index, void *data) +-{ +- filler_t *filler = (filler_t *)mapping->a_ops->readpage; +- return read_cache_page_async(mapping, index, filler, data); +-} +- + static inline struct page *read_mapping_page(struct address_space *mapping, + pgoff_t index, void *data) + { +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -2027,6 +2027,18 @@ int generic_file_readonly_mmap(struct fi + EXPORT_SYMBOL(generic_file_mmap); + EXPORT_SYMBOL(generic_file_readonly_mmap); + ++static struct page *wait_on_page_read(struct page *page) ++{ ++ if (!IS_ERR(page)) { ++ wait_on_page_locked(page); ++ if (!PageUptodate(page)) { ++ page_cache_release(page); ++ page = ERR_PTR(-EIO); ++ } ++ } ++ return page; ++} ++ + static struct page *__read_cache_page(struct address_space *mapping, + pgoff_t index, + int (*filler)(void *, struct page *), +@@ -2053,6 +2065,8 @@ repeat: + if (err < 0) { + page_cache_release(page); + page = ERR_PTR(err); ++ } else { ++ page = wait_on_page_read(page); + } + } + return page; +@@ -2089,6 +2103,10 @@ retry: + if (err < 0) { + page_cache_release(page); + return ERR_PTR(err); ++ } else { ++ page = wait_on_page_read(page); ++ if (IS_ERR(page)) ++ return page; + } + out: + mark_page_accessed(page); +@@ -2096,40 +2114,25 @@ out: + } + + /** +- * read_cache_page_async - read into page cache, fill it if needed ++ * read_cache_page - read into page cache, fill it if needed + * @mapping: the page's address_space + * @index: the page index + * @filler: function to perform the read + * @data: first arg to filler(data, page) function, often left as NULL + * +- * Same as read_cache_page, but don't wait for page to become unlocked +- * after submitting it to the filler. +- * + * Read into the page cache. If a page already exists, and PageUptodate() is +- * not set, try to fill the page but don't wait for it to become unlocked. ++ * not set, try to fill the page and wait for it to become unlocked. + * + * If the page does not get brought uptodate, return -EIO. + */ +-struct page *read_cache_page_async(struct address_space *mapping, ++struct page *read_cache_page(struct address_space *mapping, + pgoff_t index, + int (*filler)(void *, struct page *), + void *data) + { + return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); + } +-EXPORT_SYMBOL(read_cache_page_async); +- +-static struct page *wait_on_page_read(struct page *page) +-{ +- if (!IS_ERR(page)) { +- wait_on_page_locked(page); +- if (!PageUptodate(page)) { +- page_cache_release(page); +- page = ERR_PTR(-EIO); +- } +- } +- return page; +-} ++EXPORT_SYMBOL(read_cache_page); + + /** + * read_cache_page_gfp - read into page cache, using specified page allocation flags. +@@ -2148,31 +2151,10 @@ struct page *read_cache_page_gfp(struct + { + filler_t *filler = (filler_t *)mapping->a_ops->readpage; + +- return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp)); ++ return do_read_cache_page(mapping, index, filler, NULL, gfp); + } + EXPORT_SYMBOL(read_cache_page_gfp); + +-/** +- * read_cache_page - read into page cache, fill it if needed +- * @mapping: the page's address_space +- * @index: the page index +- * @filler: function to perform the read +- * @data: first arg to filler(data, page) function, often left as NULL +- * +- * Read into the page cache. If a page already exists, and PageUptodate() is +- * not set, try to fill the page then wait for it to become unlocked. +- * +- * If the page does not get brought uptodate, return -EIO. +- */ +-struct page *read_cache_page(struct address_space *mapping, +- pgoff_t index, +- int (*filler)(void *, struct page *), +- void *data) +-{ +- return wait_on_page_read(read_cache_page_async(mapping, index, filler, data)); +-} +-EXPORT_SYMBOL(read_cache_page); +- + static size_t __iovec_copy_from_user_inatomic(char *vaddr, + const struct iovec *iov, size_t base, size_t bytes) + { diff --git a/queue-3.14/mm-shmem-save-one-radix-tree-lookup-when-truncating-swapped-pages.patch b/queue-3.14/mm-shmem-save-one-radix-tree-lookup-when-truncating-swapped-pages.patch new file mode 100644 index 00000000000..23e360f3332 --- /dev/null +++ b/queue-3.14/mm-shmem-save-one-radix-tree-lookup-when-truncating-swapped-pages.patch @@ -0,0 +1,97 @@ +From 6dbaf22ce1f1dfba33313198eb5bd989ae76dd87 Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Thu, 3 Apr 2014 14:47:41 -0700 +Subject: mm: shmem: save one radix tree lookup when truncating swapped pages + +From: Johannes Weiner + +commit 6dbaf22ce1f1dfba33313198eb5bd989ae76dd87 upstream. + +Page cache radix tree slots are usually stabilized by the page lock, but +shmem's swap cookies have no such thing. Because the overall truncation +loop is lockless, the swap entry is currently confirmed by a tree lookup +and then deleted by another tree lookup under the same tree lock region. + +Use radix_tree_delete_item() instead, which does the verification and +deletion with only one lookup. This also allows removing the +delete-only special case from shmem_radix_tree_replace(). + +Signed-off-by: Johannes Weiner +Reviewed-by: Minchan Kim +Reviewed-by: Rik van Riel +Acked-by: Mel Gorman +Cc: Andrea Arcangeli +Cc: Bob Liu +Cc: Christoph Hellwig +Cc: Dave Chinner +Cc: Greg Thelen +Cc: Hugh Dickins +Cc: Jan Kara +Cc: KOSAKI Motohiro +Cc: Luigi Semenzato +Cc: Metin Doslu +Cc: Michel Lespinasse +Cc: Ozgun Erdogan +Cc: Peter Zijlstra +Cc: Roman Gushchin +Cc: Ryan Mallon +Cc: Tejun Heo +Cc: Vlastimil Babka +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Mel Gorman +Signed-off-by: Greg Kroah-Hartman + +--- + mm/shmem.c | 25 ++++++++++++------------- + 1 file changed, 12 insertions(+), 13 deletions(-) + +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -243,19 +243,17 @@ static int shmem_radix_tree_replace(stru + pgoff_t index, void *expected, void *replacement) + { + void **pslot; +- void *item = NULL; ++ void *item; + + VM_BUG_ON(!expected); ++ VM_BUG_ON(!replacement); + pslot = radix_tree_lookup_slot(&mapping->page_tree, index); +- if (pslot) +- item = radix_tree_deref_slot_protected(pslot, +- &mapping->tree_lock); ++ if (!pslot) ++ return -ENOENT; ++ item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock); + if (item != expected) + return -ENOENT; +- if (replacement) +- radix_tree_replace_slot(pslot, replacement); +- else +- radix_tree_delete(&mapping->page_tree, index); ++ radix_tree_replace_slot(pslot, replacement); + return 0; + } + +@@ -387,14 +385,15 @@ export: + static int shmem_free_swap(struct address_space *mapping, + pgoff_t index, void *radswap) + { +- int error; ++ void *old; + + spin_lock_irq(&mapping->tree_lock); +- error = shmem_radix_tree_replace(mapping, index, radswap, NULL); ++ old = radix_tree_delete_item(&mapping->page_tree, index, radswap); + spin_unlock_irq(&mapping->tree_lock); +- if (!error) +- free_swap_and_cache(radix_to_swp_entry(radswap)); +- return error; ++ if (old != radswap) ++ return -ENOENT; ++ free_swap_and_cache(radix_to_swp_entry(radswap)); ++ return 0; + } + + /* diff --git a/queue-3.14/series b/queue-3.14/series index e2966a66c5d..d6cc0dd85b1 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -96,3 +96,14 @@ net-sctp-fix-panic-on-duplicate-asconf-chunks.patch net-sctp-fix-skb_over_panic-when-receiving-malformed-asconf-chunks.patch iwlwifi-configure-the-ltr.patch regmap-fix-kernel-hang-on-regmap_bulk_write-with-zero-val_count.patch +lib-radix-tree-add-radix_tree_delete_item.patch +mm-shmem-save-one-radix-tree-lookup-when-truncating-swapped-pages.patch +mm-filemap-move-radix-tree-hole-searching-here.patch +mm-fs-prepare-for-non-page-entries-in-page-cache-radix-trees.patch +mm-madvise-fix-madv_willneed-on-shmem-swapouts.patch +mm-remove-read_cache_page_async.patch +callers-of-iov_copy_from_user_atomic-don-t-need.patch +mm-readahead.c-inline-ra_submit.patch +mm-compaction-clean-up-unused-code-lines.patch +mm-compaction-cleanup-isolate_freepages.patch +mm-migration-add-destination-page-freeing-callback.patch