From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 28 Jan 2015 01:06:36 +0000 (-0800)
Subject: 3.14-stable patches
X-Git-Tag: v3.10.67~8
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3fd061c6f9a68e317230683a2afcb79506958d1b;p=thirdparty%2Fkernel%2Fstable-queue.git

3.14-stable patches

added patches:
	memcg-vmscan-fix-forced-scan-of-anonymous-pages.patch
	mm-avoid-unnecessary-atomic-operations-during-end_page_writeback.patch
	mm-make-copy_pte_range-static-again.patch
	mm-memory.c-use-entry-access_once-pte-in-handle_pte_fault.patch
	mm-pagemap-avoid-unnecessary-overhead-when-tracepoints-are-deactivated.patch
	mm-rearrange-zone-fields-into-read-only-page-alloc-statistics-and-page-reclaim-lines.patch
	mm-thp-only-collapse-hugepages-to-nodes-with-affinity-for-zone_reclaim_mode.patch
	shmem-fix-init_page_accessed-use-to-stop-pagelru-bug.patch
	vmalloc-use-rcu-list-iterator-to-reduce-vmap_area_lock-contention.patch
---

diff --git a/queue-3.14/memcg-vmscan-fix-forced-scan-of-anonymous-pages.patch b/queue-3.14/memcg-vmscan-fix-forced-scan-of-anonymous-pages.patch
new file mode 100644
index 00000000000..cfb00fff81c
--- /dev/null
+++ b/queue-3.14/memcg-vmscan-fix-forced-scan-of-anonymous-pages.patch
@@ -0,0 +1,90 @@
+From 2ab051e11bfa3cbb7b24177f3d6aaed10a0d743e Mon Sep 17 00:00:00 2001
+From: Jerome Marchand <jmarchan@redhat.com>
+Date: Wed, 6 Aug 2014 16:08:03 -0700
+Subject: memcg, vmscan: Fix forced scan of anonymous pages
+
+From: Jerome Marchand <jmarchan@redhat.com>
+
+commit 2ab051e11bfa3cbb7b24177f3d6aaed10a0d743e upstream.
+
+When memory cgoups are enabled, the code that decides to force to scan
+anonymous pages in get_scan_count() compares global values (free,
+high_watermark) to a value that is restricted to a memory cgroup (file).
+It make the code over-eager to force anon scan.
+
+For instance, it will force anon scan when scanning a memcg that is
+mainly populated by anonymous page, even when there is plenty of file
+pages to get rid of in others memcgs, even when swappiness == 0.  It
+breaks user's expectation about swappiness and hurts performance.
+
+This patch makes sure that forced anon scan only happens when there not
+enough file pages for the all zone, not just in one random memcg.
+
+[hannes@cmpxchg.org: cleanups]
+Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.cz>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmscan.c |   23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1847,7 +1847,7 @@ static void get_scan_count(struct lruvec
+ 	struct zone *zone = lruvec_zone(lruvec);
+ 	unsigned long anon_prio, file_prio;
+ 	enum scan_balance scan_balance;
+-	unsigned long anon, file, free;
++	unsigned long anon, file;
+ 	bool force_scan = false;
+ 	unsigned long ap, fp;
+ 	enum lru_list lru;
+@@ -1895,11 +1895,6 @@ static void get_scan_count(struct lruvec
+ 		goto out;
+ 	}
+ 
+-	anon  = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
+-		get_lru_size(lruvec, LRU_INACTIVE_ANON);
+-	file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
+-		get_lru_size(lruvec, LRU_INACTIVE_FILE);
+-
+ 	/*
+ 	 * If it's foreseeable that reclaiming the file cache won't be
+ 	 * enough to get the zone back into a desirable shape, we have
+@@ -1907,8 +1902,14 @@ static void get_scan_count(struct lruvec
+ 	 * thrashing - remaining file pages alone.
+ 	 */
+ 	if (global_reclaim(sc)) {
+-		free = zone_page_state(zone, NR_FREE_PAGES);
+-		if (unlikely(file + free <= high_wmark_pages(zone))) {
++		unsigned long zonefile;
++		unsigned long zonefree;
++
++		zonefree = zone_page_state(zone, NR_FREE_PAGES);
++		zonefile = zone_page_state(zone, NR_ACTIVE_FILE) +
++			   zone_page_state(zone, NR_INACTIVE_FILE);
++
++		if (unlikely(zonefile + zonefree <= high_wmark_pages(zone))) {
+ 			scan_balance = SCAN_ANON;
+ 			goto out;
+ 		}
+@@ -1943,6 +1944,12 @@ static void get_scan_count(struct lruvec
+ 	 *
+ 	 * anon in [0], file in [1]
+ 	 */
++
++	anon  = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
++		get_lru_size(lruvec, LRU_INACTIVE_ANON);
++	file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
++		get_lru_size(lruvec, LRU_INACTIVE_FILE);
++
+ 	spin_lock_irq(&zone->lru_lock);
+ 	if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
+ 		reclaim_stat->recent_scanned[0] /= 2;
diff --git a/queue-3.14/mm-avoid-unnecessary-atomic-operations-during-end_page_writeback.patch b/queue-3.14/mm-avoid-unnecessary-atomic-operations-during-end_page_writeback.patch
new file mode 100644
index 00000000000..2dcc5408a5d
--- /dev/null
+++ b/queue-3.14/mm-avoid-unnecessary-atomic-operations-during-end_page_writeback.patch
@@ -0,0 +1,53 @@
+From 888cf2db475a256fb0cda042140f73d7881f81fe Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Wed, 4 Jun 2014 16:10:34 -0700
+Subject: mm: avoid unnecessary atomic operations during end_page_writeback()
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 888cf2db475a256fb0cda042140f73d7881f81fe upstream.
+
+If a page is marked for immediate reclaim then it is moved to the tail of
+the LRU list.  This occurs when the system is under enough memory pressure
+for pages under writeback to reach the end of the LRU but we test for this
+using atomic operations on every writeback.  This patch uses an optimistic
+non-atomic test first.  It'll miss some pages in rare cases but the
+consequences are not severe enough to warrant such a penalty.
+
+While the function does not dominate profiles during a simple dd test the
+cost of it is reduced.
+
+73048     0.7428  vmlinux-3.15.0-rc5-mmotm-20140513 end_page_writeback
+23740     0.2409  vmlinux-3.15.0-rc5-lessatomic     end_page_writeback
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/filemap.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -644,8 +644,17 @@ EXPORT_SYMBOL(unlock_page);
+  */
+ void end_page_writeback(struct page *page)
+ {
+-	if (TestClearPageReclaim(page))
++	/*
++	 * TestClearPageReclaim could be used here but it is an atomic
++	 * operation and overkill in this particular case. Failing to
++	 * shuffle a page marked for immediate reclaim is too mild to
++	 * justify taking an atomic operation penalty at the end of
++	 * ever page writeback.
++	 */
++	if (PageReclaim(page)) {
++		ClearPageReclaim(page);
+ 		rotate_reclaimable_page(page);
++	}
+ 
+ 	if (!test_clear_page_writeback(page))
+ 		BUG();
diff --git a/queue-3.14/mm-make-copy_pte_range-static-again.patch b/queue-3.14/mm-make-copy_pte_range-static-again.patch
new file mode 100644
index 00000000000..5bf98e03628
--- /dev/null
+++ b/queue-3.14/mm-make-copy_pte_range-static-again.patch
@@ -0,0 +1,50 @@
+From 21bda264f4243f61dfcc485174055f12ad0530b4 Mon Sep 17 00:00:00 2001
+From: Jerome Marchand <jmarchan@redhat.com>
+Date: Wed, 6 Aug 2014 16:06:56 -0700
+Subject: mm: make copy_pte_range static again
+
+From: Jerome Marchand <jmarchan@redhat.com>
+
+commit 21bda264f4243f61dfcc485174055f12ad0530b4 upstream.
+
+Commit 71e3aac0724f ("thp: transparent hugepage core") adds
+copy_pte_range prototype to huge_mm.h.  I'm not sure why (or if) this
+function have been used outside of memory.c, but it currently isn't.
+This patch makes copy_pte_range() static again.
+
+Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/huge_mm.h |    4 ----
+ mm/memory.c             |    2 +-
+ 2 files changed, 1 insertion(+), 5 deletions(-)
+
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -93,10 +93,6 @@ extern bool is_vma_temporary_stack(struc
+ #endif /* CONFIG_DEBUG_VM */
+ 
+ extern unsigned long transparent_hugepage_flags;
+-extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+-			  pmd_t *dst_pmd, pmd_t *src_pmd,
+-			  struct vm_area_struct *vma,
+-			  unsigned long addr, unsigned long end);
+ extern int split_huge_page_to_list(struct page *page, struct list_head *list);
+ static inline int split_huge_page(struct page *page)
+ {
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -878,7 +878,7 @@ out_set_pte:
+ 	return 0;
+ }
+ 
+-int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
++static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ 		   pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
+ 		   unsigned long addr, unsigned long end)
+ {
diff --git a/queue-3.14/mm-memory.c-use-entry-access_once-pte-in-handle_pte_fault.patch b/queue-3.14/mm-memory.c-use-entry-access_once-pte-in-handle_pte_fault.patch
new file mode 100644
index 00000000000..9bb8c869d76
--- /dev/null
+++ b/queue-3.14/mm-memory.c-use-entry-access_once-pte-in-handle_pte_fault.patch
@@ -0,0 +1,48 @@
+From c0d73261f5c1355a35b8b40e871d31578ce0c044 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 6 Aug 2014 16:05:08 -0700
+Subject: mm/memory.c: use entry = ACCESS_ONCE(*pte) in handle_pte_fault()
+
+From: Hugh Dickins <hughd@google.com>
+
+commit c0d73261f5c1355a35b8b40e871d31578ce0c044 upstream.
+
+Use ACCESS_ONCE() in handle_pte_fault() when getting the entry or
+orig_pte upon which all subsequent decisions and pte_same() tests will
+be made.
+
+I have no evidence that its lack is responsible for the mm/filemap.c:202
+BUG_ON(page_mapped(page)) in __delete_from_page_cache() found by
+trinity, and I am not optimistic that it will fix it.  But I have found
+no other explanation, and ACCESS_ONCE() here will surely not hurt.
+
+If gcc does re-access the pte before passing it down, then that would be
+disastrous for correct page fault handling, and certainly could explain
+the page_mapped() BUGs seen (concurrent fault causing page to be mapped
+in a second time on top of itself: mapcount 2 for a single pte).
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Sasha Levin <sasha.levin@oracle.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Konstantin Khlebnikov <koct9i@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3646,7 +3646,7 @@ static int handle_pte_fault(struct mm_st
+ 	pte_t entry;
+ 	spinlock_t *ptl;
+ 
+-	entry = *pte;
++	entry = ACCESS_ONCE(*pte);
+ 	if (!pte_present(entry)) {
+ 		if (pte_none(entry)) {
+ 			if (vma->vm_ops) {
diff --git a/queue-3.14/mm-pagemap-avoid-unnecessary-overhead-when-tracepoints-are-deactivated.patch b/queue-3.14/mm-pagemap-avoid-unnecessary-overhead-when-tracepoints-are-deactivated.patch
new file mode 100644
index 00000000000..39787b6f89d
--- /dev/null
+++ b/queue-3.14/mm-pagemap-avoid-unnecessary-overhead-when-tracepoints-are-deactivated.patch
@@ -0,0 +1,129 @@
+From 24b7e5819ad5cbef2b7c7376510862aa8319d240 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Wed, 6 Aug 2014 16:07:11 -0700
+Subject: mm: pagemap: avoid unnecessary overhead when tracepoints are deactivated
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 24b7e5819ad5cbef2b7c7376510862aa8319d240 upstream.
+
+This was formerly the series "Improve sequential read throughput" which
+noted some major differences in performance of tiobench since 3.0.
+While there are a number of factors, two that dominated were the
+introduction of the fair zone allocation policy and changes to CFQ.
+
+The behaviour of fair zone allocation policy makes more sense than
+tiobench as a benchmark and CFQ defaults were not changed due to
+insufficient benchmarking.
+
+This series is what's left.  It's one functional fix to the fair zone
+allocation policy when used on NUMA machines and a reduction of overhead
+in general.  tiobench was used for the comparison despite its flaws as
+an IO benchmark as in this case we are primarily interested in the
+overhead of page allocator and page reclaim activity.
+
+On UMA, it makes little difference to overhead
+
+          3.16.0-rc3   3.16.0-rc3
+             vanilla lowercost-v5
+User          383.61      386.77
+System        403.83      401.74
+Elapsed      5411.50     5413.11
+
+On a 4-socket NUMA machine it's a bit more noticable
+
+          3.16.0-rc3   3.16.0-rc3
+             vanilla lowercost-v5
+User          746.94      802.00
+System      65336.22    40852.33
+Elapsed     27553.52    27368.46
+
+This patch (of 6):
+
+The LRU insertion and activate tracepoints take PFN as a parameter
+forcing the overhead to the caller.  Move the overhead to the tracepoint
+fast-assign method to ensure the cost is only incurred when the
+tracepoint is active.
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/trace/events/pagemap.h |   16 +++++++---------
+ mm/swap.c                      |    4 ++--
+ 2 files changed, 9 insertions(+), 11 deletions(-)
+
+--- a/include/trace/events/pagemap.h
++++ b/include/trace/events/pagemap.h
+@@ -28,12 +28,10 @@ TRACE_EVENT(mm_lru_insertion,
+ 
+ 	TP_PROTO(
+ 		struct page *page,
+-		unsigned long pfn,
+-		int lru,
+-		unsigned long flags
++		int lru
+ 	),
+ 
+-	TP_ARGS(page, pfn, lru, flags),
++	TP_ARGS(page, lru),
+ 
+ 	TP_STRUCT__entry(
+ 		__field(struct page *,	page	)
+@@ -44,9 +42,9 @@ TRACE_EVENT(mm_lru_insertion,
+ 
+ 	TP_fast_assign(
+ 		__entry->page	= page;
+-		__entry->pfn	= pfn;
++		__entry->pfn	= page_to_pfn(page);
+ 		__entry->lru	= lru;
+-		__entry->flags	= flags;
++		__entry->flags	= trace_pagemap_flags(page);
+ 	),
+ 
+ 	/* Flag format is based on page-types.c formatting for pagemap */
+@@ -64,9 +62,9 @@ TRACE_EVENT(mm_lru_insertion,
+ 
+ TRACE_EVENT(mm_lru_activate,
+ 
+-	TP_PROTO(struct page *page, unsigned long pfn),
++	TP_PROTO(struct page *page),
+ 
+-	TP_ARGS(page, pfn),
++	TP_ARGS(page),
+ 
+ 	TP_STRUCT__entry(
+ 		__field(struct page *,	page	)
+@@ -75,7 +73,7 @@ TRACE_EVENT(mm_lru_activate,
+ 
+ 	TP_fast_assign(
+ 		__entry->page	= page;
+-		__entry->pfn	= pfn;
++		__entry->pfn	= page_to_pfn(page);
+ 	),
+ 
+ 	/* Flag format is based on page-types.c formatting for pagemap */
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -469,7 +469,7 @@ static void __activate_page(struct page
+ 		SetPageActive(page);
+ 		lru += LRU_ACTIVE;
+ 		add_page_to_lru_list(page, lruvec, lru);
+-		trace_mm_lru_activate(page, page_to_pfn(page));
++		trace_mm_lru_activate(page);
+ 
+ 		__count_vm_event(PGACTIVATE);
+ 		update_page_reclaim_stat(lruvec, file, 1);
+@@ -962,7 +962,7 @@ static void __pagevec_lru_add_fn(struct
+ 	SetPageLRU(page);
+ 	add_page_to_lru_list(page, lruvec, lru);
+ 	update_page_reclaim_stat(lruvec, file, active);
+-	trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page));
++	trace_mm_lru_insertion(page, lru);
+ }
+ 
+ /*
diff --git a/queue-3.14/mm-rearrange-zone-fields-into-read-only-page-alloc-statistics-and-page-reclaim-lines.patch b/queue-3.14/mm-rearrange-zone-fields-into-read-only-page-alloc-statistics-and-page-reclaim-lines.patch
new file mode 100644
index 00000000000..6f7549fe364
--- /dev/null
+++ b/queue-3.14/mm-rearrange-zone-fields-into-read-only-page-alloc-statistics-and-page-reclaim-lines.patch
@@ -0,0 +1,367 @@
+From 3484b2de9499df23c4604a513b36f96326ae81ad Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Wed, 6 Aug 2014 16:07:14 -0700
+Subject: mm: rearrange zone fields into read-only, page alloc, statistics and page reclaim lines
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 3484b2de9499df23c4604a513b36f96326ae81ad upstream.
+
+The arrangement of struct zone has changed over time and now it has
+reached the point where there is some inappropriate sharing going on.
+On x86-64 for example
+
+o The zone->node field is shared with the zone lock and zone->node is
+  accessed frequently from the page allocator due to the fair zone
+  allocation policy.
+
+o span_seqlock is almost never used by shares a line with free_area
+
+o Some zone statistics share a cache line with the LRU lock so
+  reclaim-intensive and allocator-intensive workloads can bounce the cache
+  line on a stat update
+
+This patch rearranges struct zone to put read-only and read-mostly
+fields together and then splits the page allocator intensive fields, the
+zone statistics and the page reclaim intensive fields into their own
+cache lines.  Note that the type of lowmem_reserve changes due to the
+watermark calculations being signed and avoiding a signed/unsigned
+conversion there.
+
+On the test configuration I used the overall size of struct zone shrunk
+by one cache line.  On smaller machines, this is not likely to be
+noticable.  However, on a 4-node NUMA machine running tiobench the
+system CPU overhead is reduced by this patch.
+
+          3.16.0-rc3  3.16.0-rc3
+             vanillarearrange-v5r9
+User          746.94      759.78
+System      65336.22    58350.98
+Elapsed     27553.52    27282.02
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mmzone.h |  205 +++++++++++++++++++++++++------------------------
+ mm/page_alloc.c        |    7 -
+ mm/vmstat.c            |    4 
+ 3 files changed, 110 insertions(+), 106 deletions(-)
+
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -321,19 +321,12 @@ enum zone_type {
+ #ifndef __GENERATING_BOUNDS_H
+ 
+ struct zone {
+-	/* Fields commonly accessed by the page allocator */
++	/* Read-mostly fields */
+ 
+ 	/* zone watermarks, access with *_wmark_pages(zone) macros */
+ 	unsigned long watermark[NR_WMARK];
+ 
+ 	/*
+-	 * When free pages are below this point, additional steps are taken
+-	 * when reading the number of free pages to avoid per-cpu counter
+-	 * drift allowing watermarks to be breached
+-	 */
+-	unsigned long percpu_drift_mark;
+-
+-	/*
+ 	 * We don't know if the memory that we're going to allocate will be freeable
+ 	 * or/and it will be released eventually, so to avoid totally wasting several
+ 	 * GB of ram we must reserve some of the lower zone memory (otherwise we risk
+@@ -341,41 +334,26 @@ struct zone {
+ 	 * on the higher zones). This array is recalculated at runtime if the
+ 	 * sysctl_lowmem_reserve_ratio sysctl changes.
+ 	 */
+-	unsigned long		lowmem_reserve[MAX_NR_ZONES];
+-
+-	/*
+-	 * This is a per-zone reserve of pages that should not be
+-	 * considered dirtyable memory.
+-	 */
+-	unsigned long		dirty_balance_reserve;
++	long lowmem_reserve[MAX_NR_ZONES];
+ 
+ #ifdef CONFIG_NUMA
+ 	int node;
++#endif
++
+ 	/*
+-	 * zone reclaim becomes active if more unmapped pages exist.
++	 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
++	 * this zone's LRU.  Maintained by the pageout code.
+ 	 */
+-	unsigned long		min_unmapped_pages;
+-	unsigned long		min_slab_pages;
+-#endif
++	unsigned int inactive_ratio;
++
++	struct pglist_data	*zone_pgdat;
+ 	struct per_cpu_pageset __percpu *pageset;
++
+ 	/*
+-	 * free areas of different sizes
++	 * This is a per-zone reserve of pages that should not be
++	 * considered dirtyable memory.
+ 	 */
+-	spinlock_t		lock;
+-#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+-	/* Set to true when the PG_migrate_skip bits should be cleared */
+-	bool			compact_blockskip_flush;
+-
+-	/* pfn where compaction free scanner should start */
+-	unsigned long		compact_cached_free_pfn;
+-	/* pfn where async and sync compaction migration scanner should start */
+-	unsigned long		compact_cached_migrate_pfn[2];
+-#endif
+-#ifdef CONFIG_MEMORY_HOTPLUG
+-	/* see spanned/present_pages for more description */
+-	seqlock_t		span_seqlock;
+-#endif
+-	struct free_area	free_area[MAX_ORDER];
++	unsigned long		dirty_balance_reserve;
+ 
+ #ifndef CONFIG_SPARSEMEM
+ 	/*
+@@ -385,71 +363,14 @@ struct zone {
+ 	unsigned long		*pageblock_flags;
+ #endif /* CONFIG_SPARSEMEM */
+ 
+-#ifdef CONFIG_COMPACTION
+-	/*
+-	 * On compaction failure, 1<<compact_defer_shift compactions
+-	 * are skipped before trying again. The number attempted since
+-	 * last failure is tracked with compact_considered.
+-	 */
+-	unsigned int		compact_considered;
+-	unsigned int		compact_defer_shift;
+-	int			compact_order_failed;
+-#endif
+-
+-	ZONE_PADDING(_pad1_)
+-
+-	/* Fields commonly accessed by the page reclaim scanner */
+-	spinlock_t		lru_lock;
+-	struct lruvec		lruvec;
+-
+-	unsigned long		pages_scanned;	   /* since last reclaim */
+-	unsigned long		flags;		   /* zone flags, see below */
+-
+-	/* Zone statistics */
+-	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
+-
+-	/*
+-	 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+-	 * this zone's LRU.  Maintained by the pageout code.
+-	 */
+-	unsigned int inactive_ratio;
+-
+-
+-	ZONE_PADDING(_pad2_)
+-	/* Rarely used or read-mostly fields */
+-
++#ifdef CONFIG_NUMA
+ 	/*
+-	 * wait_table		-- the array holding the hash table
+-	 * wait_table_hash_nr_entries	-- the size of the hash table array
+-	 * wait_table_bits	-- wait_table_size == (1 << wait_table_bits)
+-	 *
+-	 * The purpose of all these is to keep track of the people
+-	 * waiting for a page to become available and make them
+-	 * runnable again when possible. The trouble is that this
+-	 * consumes a lot of space, especially when so few things
+-	 * wait on pages at a given time. So instead of using
+-	 * per-page waitqueues, we use a waitqueue hash table.
+-	 *
+-	 * The bucket discipline is to sleep on the same queue when
+-	 * colliding and wake all in that wait queue when removing.
+-	 * When something wakes, it must check to be sure its page is
+-	 * truly available, a la thundering herd. The cost of a
+-	 * collision is great, but given the expected load of the
+-	 * table, they should be so rare as to be outweighed by the
+-	 * benefits from the saved space.
+-	 *
+-	 * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
+-	 * primary users of these fields, and in mm/page_alloc.c
+-	 * free_area_init_core() performs the initialization of them.
++	 * zone reclaim becomes active if more unmapped pages exist.
+ 	 */
+-	wait_queue_head_t	* wait_table;
+-	unsigned long		wait_table_hash_nr_entries;
+-	unsigned long		wait_table_bits;
++	unsigned long		min_unmapped_pages;
++	unsigned long		min_slab_pages;
++#endif /* CONFIG_NUMA */
+ 
+-	/*
+-	 * Discontig memory support fields.
+-	 */
+-	struct pglist_data	*zone_pgdat;
+ 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
+ 	unsigned long		zone_start_pfn;
+ 
+@@ -495,9 +416,11 @@ struct zone {
+ 	 * adjust_managed_page_count() should be used instead of directly
+ 	 * touching zone->managed_pages and totalram_pages.
+ 	 */
++	unsigned long		managed_pages;
+ 	unsigned long		spanned_pages;
+ 	unsigned long		present_pages;
+-	unsigned long		managed_pages;
++
++	const char		*name;
+ 
+ 	/*
+ 	 * Number of MIGRATE_RESEVE page block. To maintain for just
+@@ -505,10 +428,92 @@ struct zone {
+ 	 */
+ 	int			nr_migrate_reserve_block;
+ 
++#ifdef CONFIG_MEMORY_HOTPLUG
++	/* see spanned/present_pages for more description */
++	seqlock_t		span_seqlock;
++#endif
++
+ 	/*
+-	 * rarely used fields:
++	 * wait_table		-- the array holding the hash table
++	 * wait_table_hash_nr_entries	-- the size of the hash table array
++	 * wait_table_bits	-- wait_table_size == (1 << wait_table_bits)
++	 *
++	 * The purpose of all these is to keep track of the people
++	 * waiting for a page to become available and make them
++	 * runnable again when possible. The trouble is that this
++	 * consumes a lot of space, especially when so few things
++	 * wait on pages at a given time. So instead of using
++	 * per-page waitqueues, we use a waitqueue hash table.
++	 *
++	 * The bucket discipline is to sleep on the same queue when
++	 * colliding and wake all in that wait queue when removing.
++	 * When something wakes, it must check to be sure its page is
++	 * truly available, a la thundering herd. The cost of a
++	 * collision is great, but given the expected load of the
++	 * table, they should be so rare as to be outweighed by the
++	 * benefits from the saved space.
++	 *
++	 * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
++	 * primary users of these fields, and in mm/page_alloc.c
++	 * free_area_init_core() performs the initialization of them.
+ 	 */
+-	const char		*name;
++	wait_queue_head_t	*wait_table;
++	unsigned long		wait_table_hash_nr_entries;
++	unsigned long		wait_table_bits;
++
++	ZONE_PADDING(_pad1_)
++
++	/* Write-intensive fields used from the page allocator */
++	spinlock_t		lock;
++
++	/* free areas of different sizes */
++	struct free_area	free_area[MAX_ORDER];
++
++	/* zone flags, see below */
++	unsigned long		flags;
++
++	ZONE_PADDING(_pad2_)
++
++	/* Write-intensive fields used by page reclaim */
++
++	/* Fields commonly accessed by the page reclaim scanner */
++	spinlock_t		lru_lock;
++	unsigned long		pages_scanned;	   /* since last reclaim */
++	struct lruvec		lruvec;
++
++	/*
++	 * When free pages are below this point, additional steps are taken
++	 * when reading the number of free pages to avoid per-cpu counter
++	 * drift allowing watermarks to be breached
++	 */
++	unsigned long percpu_drift_mark;
++
++#if defined CONFIG_COMPACTION || defined CONFIG_CMA
++	/* pfn where compaction free scanner should start */
++	unsigned long		compact_cached_free_pfn;
++	/* pfn where async and sync compaction migration scanner should start */
++	unsigned long		compact_cached_migrate_pfn[2];
++#endif
++
++#ifdef CONFIG_COMPACTION
++	/*
++	 * On compaction failure, 1<<compact_defer_shift compactions
++	 * are skipped before trying again. The number attempted since
++	 * last failure is tracked with compact_considered.
++	 */
++	unsigned int		compact_considered;
++	unsigned int		compact_defer_shift;
++	int			compact_order_failed;
++#endif
++
++#if defined CONFIG_COMPACTION || defined CONFIG_CMA
++	/* Set to true when the PG_migrate_skip bits should be cleared */
++	bool			compact_blockskip_flush;
++#endif
++
++	ZONE_PADDING(_pad3_)
++	/* Zone statistics */
++	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
+ } ____cacheline_internodealigned_in_smp;
+ 
+ typedef enum {
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -1710,7 +1710,6 @@ static bool __zone_watermark_ok(struct z
+ {
+ 	/* free_pages my go negative - that's OK */
+ 	long min = mark;
+-	long lowmem_reserve = z->lowmem_reserve[classzone_idx];
+ 	int o;
+ 	long free_cma = 0;
+ 
+@@ -1725,7 +1724,7 @@ static bool __zone_watermark_ok(struct z
+ 		free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
+ #endif
+ 
+-	if (free_pages - free_cma <= min + lowmem_reserve)
++	if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
+ 		return false;
+ 	for (o = 0; o < order; o++) {
+ 		/* At the next order, this order's pages become unavailable */
+@@ -3257,7 +3256,7 @@ void show_free_areas(unsigned int filter
+ 			);
+ 		printk("lowmem_reserve[]:");
+ 		for (i = 0; i < MAX_NR_ZONES; i++)
+-			printk(" %lu", zone->lowmem_reserve[i]);
++			printk(" %ld", zone->lowmem_reserve[i]);
+ 		printk("\n");
+ 	}
+ 
+@@ -5585,7 +5584,7 @@ static void calculate_totalreserve_pages
+ 	for_each_online_pgdat(pgdat) {
+ 		for (i = 0; i < MAX_NR_ZONES; i++) {
+ 			struct zone *zone = pgdat->node_zones + i;
+-			unsigned long max = 0;
++			long max = 0;
+ 
+ 			/* Find valid and maximum lowmem_reserve in the zone */
+ 			for (j = i; j < MAX_NR_ZONES; j++) {
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -1065,10 +1065,10 @@ static void zoneinfo_show_print(struct s
+ 				zone_page_state(zone, i));
+ 
+ 	seq_printf(m,
+-		   "\n        protection: (%lu",
++		   "\n        protection: (%ld",
+ 		   zone->lowmem_reserve[0]);
+ 	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
+-		seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
++		seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
+ 	seq_printf(m,
+ 		   ")"
+ 		   "\n  pagesets");
diff --git a/queue-3.14/mm-thp-only-collapse-hugepages-to-nodes-with-affinity-for-zone_reclaim_mode.patch b/queue-3.14/mm-thp-only-collapse-hugepages-to-nodes-with-affinity-for-zone_reclaim_mode.patch
new file mode 100644
index 00000000000..c72e71db5ea
--- /dev/null
+++ b/queue-3.14/mm-thp-only-collapse-hugepages-to-nodes-with-affinity-for-zone_reclaim_mode.patch
@@ -0,0 +1,87 @@
+From 14a4e2141e24304fff2c697be6382ffb83888185 Mon Sep 17 00:00:00 2001
+From: David Rientjes <rientjes@google.com>
+Date: Wed, 6 Aug 2014 16:07:29 -0700
+Subject: mm, thp: only collapse hugepages to nodes with affinity for zone_reclaim_mode
+
+From: David Rientjes <rientjes@google.com>
+
+commit 14a4e2141e24304fff2c697be6382ffb83888185 upstream.
+
+Commit 9f1b868a13ac ("mm: thp: khugepaged: add policy for finding target
+node") improved the previous khugepaged logic which allocated a
+transparent hugepages from the node of the first page being collapsed.
+
+However, it is still possible to collapse pages to remote memory which
+may suffer from additional access latency.  With the current policy, it
+is possible that 255 pages (with PAGE_SHIFT == 12) will be collapsed
+remotely if the majority are allocated from that node.
+
+When zone_reclaim_mode is enabled, it means the VM should make every
+attempt to allocate locally to prevent NUMA performance degradation.  In
+this case, we do not want to collapse hugepages to remote nodes that
+would suffer from increased access latency.  Thus, when
+zone_reclaim_mode is enabled, only allow collapsing to nodes with
+RECLAIM_DISTANCE or less.
+
+There is no functional change for systems that disable
+zone_reclaim_mode.
+
+Signed-off-by: David Rientjes <rientjes@google.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Bob Liu <bob.liu@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/huge_memory.c |   26 ++++++++++++++++++++++++++
+ 1 file changed, 26 insertions(+)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2273,6 +2273,30 @@ static void khugepaged_alloc_sleep(void)
+ 
+ static int khugepaged_node_load[MAX_NUMNODES];
+ 
++static bool khugepaged_scan_abort(int nid)
++{
++	int i;
++
++	/*
++	 * If zone_reclaim_mode is disabled, then no extra effort is made to
++	 * allocate memory locally.
++	 */
++	if (!zone_reclaim_mode)
++		return false;
++
++	/* If there is a count for this node already, it must be acceptable */
++	if (khugepaged_node_load[nid])
++		return false;
++
++	for (i = 0; i < MAX_NUMNODES; i++) {
++		if (!khugepaged_node_load[i])
++			continue;
++		if (node_distance(nid, i) > RECLAIM_DISTANCE)
++			return true;
++	}
++	return false;
++}
++
+ #ifdef CONFIG_NUMA
+ static int khugepaged_find_target_node(void)
+ {
+@@ -2589,6 +2613,8 @@ static int khugepaged_scan_pmd(struct mm
+ 		 * hit record.
+ 		 */
+ 		node = page_to_nid(page);
++		if (khugepaged_scan_abort(node))
++			goto out_unmap;
+ 		khugepaged_node_load[node]++;
+ 		VM_BUG_ON_PAGE(PageCompound(page), page);
+ 		if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
diff --git a/queue-3.14/series b/queue-3.14/series
index 31609754fc2..f12aa9a328a 100644
--- a/queue-3.14/series
+++ b/queue-3.14/series
@@ -60,3 +60,12 @@ mm-do-not-use-atomic-operations-when-releasing-pages.patch
 mm-do-not-use-unnecessary-atomic-operations-when-adding-pages-to-the-lru.patch
 fs-buffer-do-not-use-unnecessary-atomic-operations-when-discarding-buffers.patch
 mm-non-atomically-mark-page-accessed-during-page-cache-allocation-where-possible.patch
+mm-avoid-unnecessary-atomic-operations-during-end_page_writeback.patch
+shmem-fix-init_page_accessed-use-to-stop-pagelru-bug.patch
+mm-memory.c-use-entry-access_once-pte-in-handle_pte_fault.patch
+mm-thp-only-collapse-hugepages-to-nodes-with-affinity-for-zone_reclaim_mode.patch
+mm-make-copy_pte_range-static-again.patch
+vmalloc-use-rcu-list-iterator-to-reduce-vmap_area_lock-contention.patch
+memcg-vmscan-fix-forced-scan-of-anonymous-pages.patch
+mm-pagemap-avoid-unnecessary-overhead-when-tracepoints-are-deactivated.patch
+mm-rearrange-zone-fields-into-read-only-page-alloc-statistics-and-page-reclaim-lines.patch
diff --git a/queue-3.14/shmem-fix-init_page_accessed-use-to-stop-pagelru-bug.patch b/queue-3.14/shmem-fix-init_page_accessed-use-to-stop-pagelru-bug.patch
new file mode 100644
index 00000000000..e593ba3e06c
--- /dev/null
+++ b/queue-3.14/shmem-fix-init_page_accessed-use-to-stop-pagelru-bug.patch
@@ -0,0 +1,88 @@
+From 66d2f4d28cd030220e7ea2a628993fcabcb956d1 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 2 Jul 2014 15:22:38 -0700
+Subject: shmem: fix init_page_accessed use to stop !PageLRU bug
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 66d2f4d28cd030220e7ea2a628993fcabcb956d1 upstream.
+
+Under shmem swapping load, I sometimes hit the VM_BUG_ON_PAGE(!PageLRU)
+in isolate_lru_pages() at mm/vmscan.c:1281!
+
+Commit 2457aec63745 ("mm: non-atomically mark page accessed during page
+cache allocation where possible") looks like interrupted work-in-progress.
+
+mm/filemap.c's call to init_page_accessed() is fine, but not mm/shmem.c's
+- shmem_write_begin() is clearly wrong to use it after shmem_getpage(),
+when the page is always visible in radix_tree, and often already on LRU.
+
+Revert change to shmem_write_begin(), and use init_page_accessed() or
+mark_page_accessed() appropriately for SGP_WRITE in shmem_getpage_gfp().
+
+SGP_WRITE also covers shmem_symlink(), which did not mark_page_accessed()
+before; but since many other filesystems use [__]page_symlink(), which did
+and does mark the page accessed, consider this as rectifying an oversight.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Prabhakar Lad <prabhakar.csengg@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/shmem.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -1035,6 +1035,9 @@ repeat:
+ 		goto failed;
+ 	}
+ 
++	if (page && sgp == SGP_WRITE)
++		mark_page_accessed(page);
++
+ 	/* fallocated page? */
+ 	if (page && !PageUptodate(page)) {
+ 		if (sgp != SGP_READ)
+@@ -1116,6 +1119,9 @@ repeat:
+ 		shmem_recalc_inode(inode);
+ 		spin_unlock(&info->lock);
+ 
++		if (sgp == SGP_WRITE)
++			mark_page_accessed(page);
++
+ 		delete_from_swap_cache(page);
+ 		set_page_dirty(page);
+ 		swap_free(swap);
+@@ -1142,6 +1148,9 @@ repeat:
+ 
+ 		__SetPageSwapBacked(page);
+ 		__set_page_locked(page);
++		if (sgp == SGP_WRITE)
++			init_page_accessed(page);
++
+ 		error = mem_cgroup_cache_charge(page, current->mm,
+ 						gfp & GFP_RECLAIM_MASK);
+ 		if (error)
+@@ -1438,13 +1447,9 @@ shmem_write_begin(struct file *file, str
+ 			loff_t pos, unsigned len, unsigned flags,
+ 			struct page **pagep, void **fsdata)
+ {
+-	int ret;
+ 	struct inode *inode = mapping->host;
+ 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+-	ret = shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
+-	if (ret == 0 && *pagep)
+-		init_page_accessed(*pagep);
+-	return ret;
++	return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
+ }
+ 
+ static int
diff --git a/queue-3.14/vmalloc-use-rcu-list-iterator-to-reduce-vmap_area_lock-contention.patch b/queue-3.14/vmalloc-use-rcu-list-iterator-to-reduce-vmap_area_lock-contention.patch
new file mode 100644
index 00000000000..cae32e8c632
--- /dev/null
+++ b/queue-3.14/vmalloc-use-rcu-list-iterator-to-reduce-vmap_area_lock-contention.patch
@@ -0,0 +1,109 @@
+From 474750aba88817c53f39424e5567b8e4acc4b39b Mon Sep 17 00:00:00 2001
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Date: Wed, 6 Aug 2014 16:05:06 -0700
+Subject: vmalloc: use rcu list iterator to reduce vmap_area_lock contention
+
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+
+commit 474750aba88817c53f39424e5567b8e4acc4b39b upstream.
+
+Richard Yao reported a month ago that his system have a trouble with
+vmap_area_lock contention during performance analysis by /proc/meminfo.
+Andrew asked why his analysis checks /proc/meminfo stressfully, but he
+didn't answer it.
+
+  https://lkml.org/lkml/2014/4/10/416
+
+Although I'm not sure that this is right usage or not, there is a
+solution reducing vmap_area_lock contention with no side-effect.  That
+is just to use rcu list iterator in get_vmalloc_info().
+
+rcu can be used in this function because all RCU protocol is already
+respected by writers, since Nick Piggin commit db64fe02258f1 ("mm:
+rewrite vmap layer") back in linux-2.6.28
+
+Specifically :
+   insertions use list_add_rcu(),
+   deletions use list_del_rcu() and kfree_rcu().
+
+Note the rb tree is not used from rcu reader (it would not be safe),
+only the vmap_area_list has full RCU protection.
+
+Note that __purge_vmap_area_lazy() already uses this rcu protection.
+
+        rcu_read_lock();
+        list_for_each_entry_rcu(va, &vmap_area_list, list) {
+                if (va->flags & VM_LAZY_FREE) {
+                        if (va->va_start < *start)
+                                *start = va->va_start;
+                        if (va->va_end > *end)
+                                *end = va->va_end;
+                        nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
+                        list_add_tail(&va->purge_list, &valist);
+                        va->flags |= VM_LAZY_FREEING;
+                        va->flags &= ~VM_LAZY_FREE;
+                }
+        }
+        rcu_read_unlock();
+
+Peter:
+
+: While rcu list traversal over the vmap_area_list is safe, this may
+: arrive at different results than the spinlocked version. The rcu list
+: traversal version will not be a 'snapshot' of a single, valid instant
+: of the entire vmap_area_list, but rather a potential amalgam of
+: different list states.
+
+Joonsoo:
+
+: Yes, you are right, but I don't think that we should be strict here.
+: Meminfo is already not a 'snapshot' at specific time.  While we try to get
+: certain stats, the other stats can change.  And, although we may arrive at
+: different results than the spinlocked version, the difference would not be
+: large and would not make serious side-effect.
+
+[edumazet@google.com: add more commit description]
+Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Reported-by: Richard Yao <ryao@gentoo.org>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Cc: Peter Hurley <peter@hurleysoftware.com>
+Cc: Zhang Yanfei <zhangyanfei.yes@gmail.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Andi Kleen <andi@firstfloor.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmalloc.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -2681,14 +2681,14 @@ void get_vmalloc_info(struct vmalloc_inf
+ 
+ 	prev_end = VMALLOC_START;
+ 
+-	spin_lock(&vmap_area_lock);
++	rcu_read_lock();
+ 
+ 	if (list_empty(&vmap_area_list)) {
+ 		vmi->largest_chunk = VMALLOC_TOTAL;
+ 		goto out;
+ 	}
+ 
+-	list_for_each_entry(va, &vmap_area_list, list) {
++	list_for_each_entry_rcu(va, &vmap_area_list, list) {
+ 		unsigned long addr = va->va_start;
+ 
+ 		/*
+@@ -2715,7 +2715,7 @@ void get_vmalloc_info(struct vmalloc_inf
+ 		vmi->largest_chunk = VMALLOC_END - prev_end;
+ 
+ out:
+-	spin_unlock(&vmap_area_lock);
++	rcu_read_unlock();
+ }
+ #endif
+