]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 28 Jan 2015 01:06:36 +0000 (17:06 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 28 Jan 2015 01:06:36 +0000 (17:06 -0800)
added patches:
memcg-vmscan-fix-forced-scan-of-anonymous-pages.patch
mm-avoid-unnecessary-atomic-operations-during-end_page_writeback.patch
mm-make-copy_pte_range-static-again.patch
mm-memory.c-use-entry-access_once-pte-in-handle_pte_fault.patch
mm-pagemap-avoid-unnecessary-overhead-when-tracepoints-are-deactivated.patch
mm-rearrange-zone-fields-into-read-only-page-alloc-statistics-and-page-reclaim-lines.patch
mm-thp-only-collapse-hugepages-to-nodes-with-affinity-for-zone_reclaim_mode.patch
shmem-fix-init_page_accessed-use-to-stop-pagelru-bug.patch
vmalloc-use-rcu-list-iterator-to-reduce-vmap_area_lock-contention.patch

queue-3.14/memcg-vmscan-fix-forced-scan-of-anonymous-pages.patch [new file with mode: 0644]
queue-3.14/mm-avoid-unnecessary-atomic-operations-during-end_page_writeback.patch [new file with mode: 0644]
queue-3.14/mm-make-copy_pte_range-static-again.patch [new file with mode: 0644]
queue-3.14/mm-memory.c-use-entry-access_once-pte-in-handle_pte_fault.patch [new file with mode: 0644]
queue-3.14/mm-pagemap-avoid-unnecessary-overhead-when-tracepoints-are-deactivated.patch [new file with mode: 0644]
queue-3.14/mm-rearrange-zone-fields-into-read-only-page-alloc-statistics-and-page-reclaim-lines.patch [new file with mode: 0644]
queue-3.14/mm-thp-only-collapse-hugepages-to-nodes-with-affinity-for-zone_reclaim_mode.patch [new file with mode: 0644]
queue-3.14/series
queue-3.14/shmem-fix-init_page_accessed-use-to-stop-pagelru-bug.patch [new file with mode: 0644]
queue-3.14/vmalloc-use-rcu-list-iterator-to-reduce-vmap_area_lock-contention.patch [new file with mode: 0644]

diff --git a/queue-3.14/memcg-vmscan-fix-forced-scan-of-anonymous-pages.patch b/queue-3.14/memcg-vmscan-fix-forced-scan-of-anonymous-pages.patch
new file mode 100644 (file)
index 0000000..cfb00ff
--- /dev/null
@@ -0,0 +1,90 @@
+From 2ab051e11bfa3cbb7b24177f3d6aaed10a0d743e Mon Sep 17 00:00:00 2001
+From: Jerome Marchand <jmarchan@redhat.com>
+Date: Wed, 6 Aug 2014 16:08:03 -0700
+Subject: memcg, vmscan: Fix forced scan of anonymous pages
+
+From: Jerome Marchand <jmarchan@redhat.com>
+
+commit 2ab051e11bfa3cbb7b24177f3d6aaed10a0d743e upstream.
+
+When memory cgoups are enabled, the code that decides to force to scan
+anonymous pages in get_scan_count() compares global values (free,
+high_watermark) to a value that is restricted to a memory cgroup (file).
+It make the code over-eager to force anon scan.
+
+For instance, it will force anon scan when scanning a memcg that is
+mainly populated by anonymous page, even when there is plenty of file
+pages to get rid of in others memcgs, even when swappiness == 0.  It
+breaks user's expectation about swappiness and hurts performance.
+
+This patch makes sure that forced anon scan only happens when there not
+enough file pages for the all zone, not just in one random memcg.
+
+[hannes@cmpxchg.org: cleanups]
+Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.cz>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmscan.c |   23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1847,7 +1847,7 @@ static void get_scan_count(struct lruvec
+       struct zone *zone = lruvec_zone(lruvec);
+       unsigned long anon_prio, file_prio;
+       enum scan_balance scan_balance;
+-      unsigned long anon, file, free;
++      unsigned long anon, file;
+       bool force_scan = false;
+       unsigned long ap, fp;
+       enum lru_list lru;
+@@ -1895,11 +1895,6 @@ static void get_scan_count(struct lruvec
+               goto out;
+       }
+-      anon  = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
+-              get_lru_size(lruvec, LRU_INACTIVE_ANON);
+-      file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
+-              get_lru_size(lruvec, LRU_INACTIVE_FILE);
+-
+       /*
+        * If it's foreseeable that reclaiming the file cache won't be
+        * enough to get the zone back into a desirable shape, we have
+@@ -1907,8 +1902,14 @@ static void get_scan_count(struct lruvec
+        * thrashing - remaining file pages alone.
+        */
+       if (global_reclaim(sc)) {
+-              free = zone_page_state(zone, NR_FREE_PAGES);
+-              if (unlikely(file + free <= high_wmark_pages(zone))) {
++              unsigned long zonefile;
++              unsigned long zonefree;
++
++              zonefree = zone_page_state(zone, NR_FREE_PAGES);
++              zonefile = zone_page_state(zone, NR_ACTIVE_FILE) +
++                         zone_page_state(zone, NR_INACTIVE_FILE);
++
++              if (unlikely(zonefile + zonefree <= high_wmark_pages(zone))) {
+                       scan_balance = SCAN_ANON;
+                       goto out;
+               }
+@@ -1943,6 +1944,12 @@ static void get_scan_count(struct lruvec
+        *
+        * anon in [0], file in [1]
+        */
++
++      anon  = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
++              get_lru_size(lruvec, LRU_INACTIVE_ANON);
++      file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
++              get_lru_size(lruvec, LRU_INACTIVE_FILE);
++
+       spin_lock_irq(&zone->lru_lock);
+       if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
+               reclaim_stat->recent_scanned[0] /= 2;
diff --git a/queue-3.14/mm-avoid-unnecessary-atomic-operations-during-end_page_writeback.patch b/queue-3.14/mm-avoid-unnecessary-atomic-operations-during-end_page_writeback.patch
new file mode 100644 (file)
index 0000000..2dcc540
--- /dev/null
@@ -0,0 +1,53 @@
+From 888cf2db475a256fb0cda042140f73d7881f81fe Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Wed, 4 Jun 2014 16:10:34 -0700
+Subject: mm: avoid unnecessary atomic operations during end_page_writeback()
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 888cf2db475a256fb0cda042140f73d7881f81fe upstream.
+
+If a page is marked for immediate reclaim then it is moved to the tail of
+the LRU list.  This occurs when the system is under enough memory pressure
+for pages under writeback to reach the end of the LRU but we test for this
+using atomic operations on every writeback.  This patch uses an optimistic
+non-atomic test first.  It'll miss some pages in rare cases but the
+consequences are not severe enough to warrant such a penalty.
+
+While the function does not dominate profiles during a simple dd test the
+cost of it is reduced.
+
+73048     0.7428  vmlinux-3.15.0-rc5-mmotm-20140513 end_page_writeback
+23740     0.2409  vmlinux-3.15.0-rc5-lessatomic     end_page_writeback
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/filemap.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -644,8 +644,17 @@ EXPORT_SYMBOL(unlock_page);
+  */
+ void end_page_writeback(struct page *page)
+ {
+-      if (TestClearPageReclaim(page))
++      /*
++       * TestClearPageReclaim could be used here but it is an atomic
++       * operation and overkill in this particular case. Failing to
++       * shuffle a page marked for immediate reclaim is too mild to
++       * justify taking an atomic operation penalty at the end of
++       * ever page writeback.
++       */
++      if (PageReclaim(page)) {
++              ClearPageReclaim(page);
+               rotate_reclaimable_page(page);
++      }
+       if (!test_clear_page_writeback(page))
+               BUG();
diff --git a/queue-3.14/mm-make-copy_pte_range-static-again.patch b/queue-3.14/mm-make-copy_pte_range-static-again.patch
new file mode 100644 (file)
index 0000000..5bf98e0
--- /dev/null
@@ -0,0 +1,50 @@
+From 21bda264f4243f61dfcc485174055f12ad0530b4 Mon Sep 17 00:00:00 2001
+From: Jerome Marchand <jmarchan@redhat.com>
+Date: Wed, 6 Aug 2014 16:06:56 -0700
+Subject: mm: make copy_pte_range static again
+
+From: Jerome Marchand <jmarchan@redhat.com>
+
+commit 21bda264f4243f61dfcc485174055f12ad0530b4 upstream.
+
+Commit 71e3aac0724f ("thp: transparent hugepage core") adds
+copy_pte_range prototype to huge_mm.h.  I'm not sure why (or if) this
+function have been used outside of memory.c, but it currently isn't.
+This patch makes copy_pte_range() static again.
+
+Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/huge_mm.h |    4 ----
+ mm/memory.c             |    2 +-
+ 2 files changed, 1 insertion(+), 5 deletions(-)
+
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -93,10 +93,6 @@ extern bool is_vma_temporary_stack(struc
+ #endif /* CONFIG_DEBUG_VM */
+ extern unsigned long transparent_hugepage_flags;
+-extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+-                        pmd_t *dst_pmd, pmd_t *src_pmd,
+-                        struct vm_area_struct *vma,
+-                        unsigned long addr, unsigned long end);
+ extern int split_huge_page_to_list(struct page *page, struct list_head *list);
+ static inline int split_huge_page(struct page *page)
+ {
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -878,7 +878,7 @@ out_set_pte:
+       return 0;
+ }
+-int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
++static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+                  pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
+                  unsigned long addr, unsigned long end)
+ {
diff --git a/queue-3.14/mm-memory.c-use-entry-access_once-pte-in-handle_pte_fault.patch b/queue-3.14/mm-memory.c-use-entry-access_once-pte-in-handle_pte_fault.patch
new file mode 100644 (file)
index 0000000..9bb8c86
--- /dev/null
@@ -0,0 +1,48 @@
+From c0d73261f5c1355a35b8b40e871d31578ce0c044 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 6 Aug 2014 16:05:08 -0700
+Subject: mm/memory.c: use entry = ACCESS_ONCE(*pte) in handle_pte_fault()
+
+From: Hugh Dickins <hughd@google.com>
+
+commit c0d73261f5c1355a35b8b40e871d31578ce0c044 upstream.
+
+Use ACCESS_ONCE() in handle_pte_fault() when getting the entry or
+orig_pte upon which all subsequent decisions and pte_same() tests will
+be made.
+
+I have no evidence that its lack is responsible for the mm/filemap.c:202
+BUG_ON(page_mapped(page)) in __delete_from_page_cache() found by
+trinity, and I am not optimistic that it will fix it.  But I have found
+no other explanation, and ACCESS_ONCE() here will surely not hurt.
+
+If gcc does re-access the pte before passing it down, then that would be
+disastrous for correct page fault handling, and certainly could explain
+the page_mapped() BUGs seen (concurrent fault causing page to be mapped
+in a second time on top of itself: mapcount 2 for a single pte).
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Sasha Levin <sasha.levin@oracle.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Konstantin Khlebnikov <koct9i@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3646,7 +3646,7 @@ static int handle_pte_fault(struct mm_st
+       pte_t entry;
+       spinlock_t *ptl;
+-      entry = *pte;
++      entry = ACCESS_ONCE(*pte);
+       if (!pte_present(entry)) {
+               if (pte_none(entry)) {
+                       if (vma->vm_ops) {
diff --git a/queue-3.14/mm-pagemap-avoid-unnecessary-overhead-when-tracepoints-are-deactivated.patch b/queue-3.14/mm-pagemap-avoid-unnecessary-overhead-when-tracepoints-are-deactivated.patch
new file mode 100644 (file)
index 0000000..39787b6
--- /dev/null
@@ -0,0 +1,129 @@
+From 24b7e5819ad5cbef2b7c7376510862aa8319d240 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Wed, 6 Aug 2014 16:07:11 -0700
+Subject: mm: pagemap: avoid unnecessary overhead when tracepoints are deactivated
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 24b7e5819ad5cbef2b7c7376510862aa8319d240 upstream.
+
+This was formerly the series "Improve sequential read throughput" which
+noted some major differences in performance of tiobench since 3.0.
+While there are a number of factors, two that dominated were the
+introduction of the fair zone allocation policy and changes to CFQ.
+
+The behaviour of fair zone allocation policy makes more sense than
+tiobench as a benchmark and CFQ defaults were not changed due to
+insufficient benchmarking.
+
+This series is what's left.  It's one functional fix to the fair zone
+allocation policy when used on NUMA machines and a reduction of overhead
+in general.  tiobench was used for the comparison despite its flaws as
+an IO benchmark as in this case we are primarily interested in the
+overhead of page allocator and page reclaim activity.
+
+On UMA, it makes little difference to overhead
+
+          3.16.0-rc3   3.16.0-rc3
+             vanilla lowercost-v5
+User          383.61      386.77
+System        403.83      401.74
+Elapsed      5411.50     5413.11
+
+On a 4-socket NUMA machine it's a bit more noticable
+
+          3.16.0-rc3   3.16.0-rc3
+             vanilla lowercost-v5
+User          746.94      802.00
+System      65336.22    40852.33
+Elapsed     27553.52    27368.46
+
+This patch (of 6):
+
+The LRU insertion and activate tracepoints take PFN as a parameter
+forcing the overhead to the caller.  Move the overhead to the tracepoint
+fast-assign method to ensure the cost is only incurred when the
+tracepoint is active.
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/trace/events/pagemap.h |   16 +++++++---------
+ mm/swap.c                      |    4 ++--
+ 2 files changed, 9 insertions(+), 11 deletions(-)
+
+--- a/include/trace/events/pagemap.h
++++ b/include/trace/events/pagemap.h
+@@ -28,12 +28,10 @@ TRACE_EVENT(mm_lru_insertion,
+       TP_PROTO(
+               struct page *page,
+-              unsigned long pfn,
+-              int lru,
+-              unsigned long flags
++              int lru
+       ),
+-      TP_ARGS(page, pfn, lru, flags),
++      TP_ARGS(page, lru),
+       TP_STRUCT__entry(
+               __field(struct page *,  page    )
+@@ -44,9 +42,9 @@ TRACE_EVENT(mm_lru_insertion,
+       TP_fast_assign(
+               __entry->page   = page;
+-              __entry->pfn    = pfn;
++              __entry->pfn    = page_to_pfn(page);
+               __entry->lru    = lru;
+-              __entry->flags  = flags;
++              __entry->flags  = trace_pagemap_flags(page);
+       ),
+       /* Flag format is based on page-types.c formatting for pagemap */
+@@ -64,9 +62,9 @@ TRACE_EVENT(mm_lru_insertion,
+ TRACE_EVENT(mm_lru_activate,
+-      TP_PROTO(struct page *page, unsigned long pfn),
++      TP_PROTO(struct page *page),
+-      TP_ARGS(page, pfn),
++      TP_ARGS(page),
+       TP_STRUCT__entry(
+               __field(struct page *,  page    )
+@@ -75,7 +73,7 @@ TRACE_EVENT(mm_lru_activate,
+       TP_fast_assign(
+               __entry->page   = page;
+-              __entry->pfn    = pfn;
++              __entry->pfn    = page_to_pfn(page);
+       ),
+       /* Flag format is based on page-types.c formatting for pagemap */
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -469,7 +469,7 @@ static void __activate_page(struct page
+               SetPageActive(page);
+               lru += LRU_ACTIVE;
+               add_page_to_lru_list(page, lruvec, lru);
+-              trace_mm_lru_activate(page, page_to_pfn(page));
++              trace_mm_lru_activate(page);
+               __count_vm_event(PGACTIVATE);
+               update_page_reclaim_stat(lruvec, file, 1);
+@@ -962,7 +962,7 @@ static void __pagevec_lru_add_fn(struct
+       SetPageLRU(page);
+       add_page_to_lru_list(page, lruvec, lru);
+       update_page_reclaim_stat(lruvec, file, active);
+-      trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page));
++      trace_mm_lru_insertion(page, lru);
+ }
+ /*
diff --git a/queue-3.14/mm-rearrange-zone-fields-into-read-only-page-alloc-statistics-and-page-reclaim-lines.patch b/queue-3.14/mm-rearrange-zone-fields-into-read-only-page-alloc-statistics-and-page-reclaim-lines.patch
new file mode 100644 (file)
index 0000000..6f7549f
--- /dev/null
@@ -0,0 +1,367 @@
+From 3484b2de9499df23c4604a513b36f96326ae81ad Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Wed, 6 Aug 2014 16:07:14 -0700
+Subject: mm: rearrange zone fields into read-only, page alloc, statistics and page reclaim lines
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit 3484b2de9499df23c4604a513b36f96326ae81ad upstream.
+
+The arrangement of struct zone has changed over time and now it has
+reached the point where there is some inappropriate sharing going on.
+On x86-64 for example
+
+o The zone->node field is shared with the zone lock and zone->node is
+  accessed frequently from the page allocator due to the fair zone
+  allocation policy.
+
+o span_seqlock is almost never used by shares a line with free_area
+
+o Some zone statistics share a cache line with the LRU lock so
+  reclaim-intensive and allocator-intensive workloads can bounce the cache
+  line on a stat update
+
+This patch rearranges struct zone to put read-only and read-mostly
+fields together and then splits the page allocator intensive fields, the
+zone statistics and the page reclaim intensive fields into their own
+cache lines.  Note that the type of lowmem_reserve changes due to the
+watermark calculations being signed and avoiding a signed/unsigned
+conversion there.
+
+On the test configuration I used the overall size of struct zone shrunk
+by one cache line.  On smaller machines, this is not likely to be
+noticable.  However, on a 4-node NUMA machine running tiobench the
+system CPU overhead is reduced by this patch.
+
+          3.16.0-rc3  3.16.0-rc3
+             vanillarearrange-v5r9
+User          746.94      759.78
+System      65336.22    58350.98
+Elapsed     27553.52    27282.02
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mmzone.h |  205 +++++++++++++++++++++++++------------------------
+ mm/page_alloc.c        |    7 -
+ mm/vmstat.c            |    4 
+ 3 files changed, 110 insertions(+), 106 deletions(-)
+
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -321,19 +321,12 @@ enum zone_type {
+ #ifndef __GENERATING_BOUNDS_H
+ struct zone {
+-      /* Fields commonly accessed by the page allocator */
++      /* Read-mostly fields */
+       /* zone watermarks, access with *_wmark_pages(zone) macros */
+       unsigned long watermark[NR_WMARK];
+       /*
+-       * When free pages are below this point, additional steps are taken
+-       * when reading the number of free pages to avoid per-cpu counter
+-       * drift allowing watermarks to be breached
+-       */
+-      unsigned long percpu_drift_mark;
+-
+-      /*
+        * We don't know if the memory that we're going to allocate will be freeable
+        * or/and it will be released eventually, so to avoid totally wasting several
+        * GB of ram we must reserve some of the lower zone memory (otherwise we risk
+@@ -341,41 +334,26 @@ struct zone {
+        * on the higher zones). This array is recalculated at runtime if the
+        * sysctl_lowmem_reserve_ratio sysctl changes.
+        */
+-      unsigned long           lowmem_reserve[MAX_NR_ZONES];
+-
+-      /*
+-       * This is a per-zone reserve of pages that should not be
+-       * considered dirtyable memory.
+-       */
+-      unsigned long           dirty_balance_reserve;
++      long lowmem_reserve[MAX_NR_ZONES];
+ #ifdef CONFIG_NUMA
+       int node;
++#endif
++
+       /*
+-       * zone reclaim becomes active if more unmapped pages exist.
++       * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
++       * this zone's LRU.  Maintained by the pageout code.
+        */
+-      unsigned long           min_unmapped_pages;
+-      unsigned long           min_slab_pages;
+-#endif
++      unsigned int inactive_ratio;
++
++      struct pglist_data      *zone_pgdat;
+       struct per_cpu_pageset __percpu *pageset;
++
+       /*
+-       * free areas of different sizes
++       * This is a per-zone reserve of pages that should not be
++       * considered dirtyable memory.
+        */
+-      spinlock_t              lock;
+-#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+-      /* Set to true when the PG_migrate_skip bits should be cleared */
+-      bool                    compact_blockskip_flush;
+-
+-      /* pfn where compaction free scanner should start */
+-      unsigned long           compact_cached_free_pfn;
+-      /* pfn where async and sync compaction migration scanner should start */
+-      unsigned long           compact_cached_migrate_pfn[2];
+-#endif
+-#ifdef CONFIG_MEMORY_HOTPLUG
+-      /* see spanned/present_pages for more description */
+-      seqlock_t               span_seqlock;
+-#endif
+-      struct free_area        free_area[MAX_ORDER];
++      unsigned long           dirty_balance_reserve;
+ #ifndef CONFIG_SPARSEMEM
+       /*
+@@ -385,71 +363,14 @@ struct zone {
+       unsigned long           *pageblock_flags;
+ #endif /* CONFIG_SPARSEMEM */
+-#ifdef CONFIG_COMPACTION
+-      /*
+-       * On compaction failure, 1<<compact_defer_shift compactions
+-       * are skipped before trying again. The number attempted since
+-       * last failure is tracked with compact_considered.
+-       */
+-      unsigned int            compact_considered;
+-      unsigned int            compact_defer_shift;
+-      int                     compact_order_failed;
+-#endif
+-
+-      ZONE_PADDING(_pad1_)
+-
+-      /* Fields commonly accessed by the page reclaim scanner */
+-      spinlock_t              lru_lock;
+-      struct lruvec           lruvec;
+-
+-      unsigned long           pages_scanned;     /* since last reclaim */
+-      unsigned long           flags;             /* zone flags, see below */
+-
+-      /* Zone statistics */
+-      atomic_long_t           vm_stat[NR_VM_ZONE_STAT_ITEMS];
+-
+-      /*
+-       * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+-       * this zone's LRU.  Maintained by the pageout code.
+-       */
+-      unsigned int inactive_ratio;
+-
+-
+-      ZONE_PADDING(_pad2_)
+-      /* Rarely used or read-mostly fields */
+-
++#ifdef CONFIG_NUMA
+       /*
+-       * wait_table           -- the array holding the hash table
+-       * wait_table_hash_nr_entries   -- the size of the hash table array
+-       * wait_table_bits      -- wait_table_size == (1 << wait_table_bits)
+-       *
+-       * The purpose of all these is to keep track of the people
+-       * waiting for a page to become available and make them
+-       * runnable again when possible. The trouble is that this
+-       * consumes a lot of space, especially when so few things
+-       * wait on pages at a given time. So instead of using
+-       * per-page waitqueues, we use a waitqueue hash table.
+-       *
+-       * The bucket discipline is to sleep on the same queue when
+-       * colliding and wake all in that wait queue when removing.
+-       * When something wakes, it must check to be sure its page is
+-       * truly available, a la thundering herd. The cost of a
+-       * collision is great, but given the expected load of the
+-       * table, they should be so rare as to be outweighed by the
+-       * benefits from the saved space.
+-       *
+-       * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
+-       * primary users of these fields, and in mm/page_alloc.c
+-       * free_area_init_core() performs the initialization of them.
++       * zone reclaim becomes active if more unmapped pages exist.
+        */
+-      wait_queue_head_t       * wait_table;
+-      unsigned long           wait_table_hash_nr_entries;
+-      unsigned long           wait_table_bits;
++      unsigned long           min_unmapped_pages;
++      unsigned long           min_slab_pages;
++#endif /* CONFIG_NUMA */
+-      /*
+-       * Discontig memory support fields.
+-       */
+-      struct pglist_data      *zone_pgdat;
+       /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
+       unsigned long           zone_start_pfn;
+@@ -495,9 +416,11 @@ struct zone {
+        * adjust_managed_page_count() should be used instead of directly
+        * touching zone->managed_pages and totalram_pages.
+        */
++      unsigned long           managed_pages;
+       unsigned long           spanned_pages;
+       unsigned long           present_pages;
+-      unsigned long           managed_pages;
++
++      const char              *name;
+       /*
+        * Number of MIGRATE_RESEVE page block. To maintain for just
+@@ -505,10 +428,92 @@ struct zone {
+        */
+       int                     nr_migrate_reserve_block;
++#ifdef CONFIG_MEMORY_HOTPLUG
++      /* see spanned/present_pages for more description */
++      seqlock_t               span_seqlock;
++#endif
++
+       /*
+-       * rarely used fields:
++       * wait_table           -- the array holding the hash table
++       * wait_table_hash_nr_entries   -- the size of the hash table array
++       * wait_table_bits      -- wait_table_size == (1 << wait_table_bits)
++       *
++       * The purpose of all these is to keep track of the people
++       * waiting for a page to become available and make them
++       * runnable again when possible. The trouble is that this
++       * consumes a lot of space, especially when so few things
++       * wait on pages at a given time. So instead of using
++       * per-page waitqueues, we use a waitqueue hash table.
++       *
++       * The bucket discipline is to sleep on the same queue when
++       * colliding and wake all in that wait queue when removing.
++       * When something wakes, it must check to be sure its page is
++       * truly available, a la thundering herd. The cost of a
++       * collision is great, but given the expected load of the
++       * table, they should be so rare as to be outweighed by the
++       * benefits from the saved space.
++       *
++       * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
++       * primary users of these fields, and in mm/page_alloc.c
++       * free_area_init_core() performs the initialization of them.
+        */
+-      const char              *name;
++      wait_queue_head_t       *wait_table;
++      unsigned long           wait_table_hash_nr_entries;
++      unsigned long           wait_table_bits;
++
++      ZONE_PADDING(_pad1_)
++
++      /* Write-intensive fields used from the page allocator */
++      spinlock_t              lock;
++
++      /* free areas of different sizes */
++      struct free_area        free_area[MAX_ORDER];
++
++      /* zone flags, see below */
++      unsigned long           flags;
++
++      ZONE_PADDING(_pad2_)
++
++      /* Write-intensive fields used by page reclaim */
++
++      /* Fields commonly accessed by the page reclaim scanner */
++      spinlock_t              lru_lock;
++      unsigned long           pages_scanned;     /* since last reclaim */
++      struct lruvec           lruvec;
++
++      /*
++       * When free pages are below this point, additional steps are taken
++       * when reading the number of free pages to avoid per-cpu counter
++       * drift allowing watermarks to be breached
++       */
++      unsigned long percpu_drift_mark;
++
++#if defined CONFIG_COMPACTION || defined CONFIG_CMA
++      /* pfn where compaction free scanner should start */
++      unsigned long           compact_cached_free_pfn;
++      /* pfn where async and sync compaction migration scanner should start */
++      unsigned long           compact_cached_migrate_pfn[2];
++#endif
++
++#ifdef CONFIG_COMPACTION
++      /*
++       * On compaction failure, 1<<compact_defer_shift compactions
++       * are skipped before trying again. The number attempted since
++       * last failure is tracked with compact_considered.
++       */
++      unsigned int            compact_considered;
++      unsigned int            compact_defer_shift;
++      int                     compact_order_failed;
++#endif
++
++#if defined CONFIG_COMPACTION || defined CONFIG_CMA
++      /* Set to true when the PG_migrate_skip bits should be cleared */
++      bool                    compact_blockskip_flush;
++#endif
++
++      ZONE_PADDING(_pad3_)
++      /* Zone statistics */
++      atomic_long_t           vm_stat[NR_VM_ZONE_STAT_ITEMS];
+ } ____cacheline_internodealigned_in_smp;
+ typedef enum {
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -1710,7 +1710,6 @@ static bool __zone_watermark_ok(struct z
+ {
+       /* free_pages my go negative - that's OK */
+       long min = mark;
+-      long lowmem_reserve = z->lowmem_reserve[classzone_idx];
+       int o;
+       long free_cma = 0;
+@@ -1725,7 +1724,7 @@ static bool __zone_watermark_ok(struct z
+               free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
+ #endif
+-      if (free_pages - free_cma <= min + lowmem_reserve)
++      if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
+               return false;
+       for (o = 0; o < order; o++) {
+               /* At the next order, this order's pages become unavailable */
+@@ -3257,7 +3256,7 @@ void show_free_areas(unsigned int filter
+                       );
+               printk("lowmem_reserve[]:");
+               for (i = 0; i < MAX_NR_ZONES; i++)
+-                      printk(" %lu", zone->lowmem_reserve[i]);
++                      printk(" %ld", zone->lowmem_reserve[i]);
+               printk("\n");
+       }
+@@ -5585,7 +5584,7 @@ static void calculate_totalreserve_pages
+       for_each_online_pgdat(pgdat) {
+               for (i = 0; i < MAX_NR_ZONES; i++) {
+                       struct zone *zone = pgdat->node_zones + i;
+-                      unsigned long max = 0;
++                      long max = 0;
+                       /* Find valid and maximum lowmem_reserve in the zone */
+                       for (j = i; j < MAX_NR_ZONES; j++) {
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -1065,10 +1065,10 @@ static void zoneinfo_show_print(struct s
+                               zone_page_state(zone, i));
+       seq_printf(m,
+-                 "\n        protection: (%lu",
++                 "\n        protection: (%ld",
+                  zone->lowmem_reserve[0]);
+       for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
+-              seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
++              seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
+       seq_printf(m,
+                  ")"
+                  "\n  pagesets");
diff --git a/queue-3.14/mm-thp-only-collapse-hugepages-to-nodes-with-affinity-for-zone_reclaim_mode.patch b/queue-3.14/mm-thp-only-collapse-hugepages-to-nodes-with-affinity-for-zone_reclaim_mode.patch
new file mode 100644 (file)
index 0000000..c72e71d
--- /dev/null
@@ -0,0 +1,87 @@
+From 14a4e2141e24304fff2c697be6382ffb83888185 Mon Sep 17 00:00:00 2001
+From: David Rientjes <rientjes@google.com>
+Date: Wed, 6 Aug 2014 16:07:29 -0700
+Subject: mm, thp: only collapse hugepages to nodes with affinity for zone_reclaim_mode
+
+From: David Rientjes <rientjes@google.com>
+
+commit 14a4e2141e24304fff2c697be6382ffb83888185 upstream.
+
+Commit 9f1b868a13ac ("mm: thp: khugepaged: add policy for finding target
+node") improved the previous khugepaged logic which allocated a
+transparent hugepages from the node of the first page being collapsed.
+
+However, it is still possible to collapse pages to remote memory which
+may suffer from additional access latency.  With the current policy, it
+is possible that 255 pages (with PAGE_SHIFT == 12) will be collapsed
+remotely if the majority are allocated from that node.
+
+When zone_reclaim_mode is enabled, it means the VM should make every
+attempt to allocate locally to prevent NUMA performance degradation.  In
+this case, we do not want to collapse hugepages to remote nodes that
+would suffer from increased access latency.  Thus, when
+zone_reclaim_mode is enabled, only allow collapsing to nodes with
+RECLAIM_DISTANCE or less.
+
+There is no functional change for systems that disable
+zone_reclaim_mode.
+
+Signed-off-by: David Rientjes <rientjes@google.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Bob Liu <bob.liu@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/huge_memory.c |   26 ++++++++++++++++++++++++++
+ 1 file changed, 26 insertions(+)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2273,6 +2273,30 @@ static void khugepaged_alloc_sleep(void)
+ static int khugepaged_node_load[MAX_NUMNODES];
++static bool khugepaged_scan_abort(int nid)
++{
++      int i;
++
++      /*
++       * If zone_reclaim_mode is disabled, then no extra effort is made to
++       * allocate memory locally.
++       */
++      if (!zone_reclaim_mode)
++              return false;
++
++      /* If there is a count for this node already, it must be acceptable */
++      if (khugepaged_node_load[nid])
++              return false;
++
++      for (i = 0; i < MAX_NUMNODES; i++) {
++              if (!khugepaged_node_load[i])
++                      continue;
++              if (node_distance(nid, i) > RECLAIM_DISTANCE)
++                      return true;
++      }
++      return false;
++}
++
+ #ifdef CONFIG_NUMA
+ static int khugepaged_find_target_node(void)
+ {
+@@ -2589,6 +2613,8 @@ static int khugepaged_scan_pmd(struct mm
+                * hit record.
+                */
+               node = page_to_nid(page);
++              if (khugepaged_scan_abort(node))
++                      goto out_unmap;
+               khugepaged_node_load[node]++;
+               VM_BUG_ON_PAGE(PageCompound(page), page);
+               if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
index 31609754fc201d9601b505f2f4c8bcccb1185448..f12aa9a328a7a5f250e27f941d49dcbe15e13ce2 100644 (file)
@@ -60,3 +60,12 @@ mm-do-not-use-atomic-operations-when-releasing-pages.patch
 mm-do-not-use-unnecessary-atomic-operations-when-adding-pages-to-the-lru.patch
 fs-buffer-do-not-use-unnecessary-atomic-operations-when-discarding-buffers.patch
 mm-non-atomically-mark-page-accessed-during-page-cache-allocation-where-possible.patch
+mm-avoid-unnecessary-atomic-operations-during-end_page_writeback.patch
+shmem-fix-init_page_accessed-use-to-stop-pagelru-bug.patch
+mm-memory.c-use-entry-access_once-pte-in-handle_pte_fault.patch
+mm-thp-only-collapse-hugepages-to-nodes-with-affinity-for-zone_reclaim_mode.patch
+mm-make-copy_pte_range-static-again.patch
+vmalloc-use-rcu-list-iterator-to-reduce-vmap_area_lock-contention.patch
+memcg-vmscan-fix-forced-scan-of-anonymous-pages.patch
+mm-pagemap-avoid-unnecessary-overhead-when-tracepoints-are-deactivated.patch
+mm-rearrange-zone-fields-into-read-only-page-alloc-statistics-and-page-reclaim-lines.patch
diff --git a/queue-3.14/shmem-fix-init_page_accessed-use-to-stop-pagelru-bug.patch b/queue-3.14/shmem-fix-init_page_accessed-use-to-stop-pagelru-bug.patch
new file mode 100644 (file)
index 0000000..e593ba3
--- /dev/null
@@ -0,0 +1,88 @@
+From 66d2f4d28cd030220e7ea2a628993fcabcb956d1 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Wed, 2 Jul 2014 15:22:38 -0700
+Subject: shmem: fix init_page_accessed use to stop !PageLRU bug
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 66d2f4d28cd030220e7ea2a628993fcabcb956d1 upstream.
+
+Under shmem swapping load, I sometimes hit the VM_BUG_ON_PAGE(!PageLRU)
+in isolate_lru_pages() at mm/vmscan.c:1281!
+
+Commit 2457aec63745 ("mm: non-atomically mark page accessed during page
+cache allocation where possible") looks like interrupted work-in-progress.
+
+mm/filemap.c's call to init_page_accessed() is fine, but not mm/shmem.c's
+- shmem_write_begin() is clearly wrong to use it after shmem_getpage(),
+when the page is always visible in radix_tree, and often already on LRU.
+
+Revert change to shmem_write_begin(), and use init_page_accessed() or
+mark_page_accessed() appropriately for SGP_WRITE in shmem_getpage_gfp().
+
+SGP_WRITE also covers shmem_symlink(), which did not mark_page_accessed()
+before; but since many other filesystems use [__]page_symlink(), which did
+and does mark the page accessed, consider this as rectifying an oversight.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Prabhakar Lad <prabhakar.csengg@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/shmem.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -1035,6 +1035,9 @@ repeat:
+               goto failed;
+       }
++      if (page && sgp == SGP_WRITE)
++              mark_page_accessed(page);
++
+       /* fallocated page? */
+       if (page && !PageUptodate(page)) {
+               if (sgp != SGP_READ)
+@@ -1116,6 +1119,9 @@ repeat:
+               shmem_recalc_inode(inode);
+               spin_unlock(&info->lock);
++              if (sgp == SGP_WRITE)
++                      mark_page_accessed(page);
++
+               delete_from_swap_cache(page);
+               set_page_dirty(page);
+               swap_free(swap);
+@@ -1142,6 +1148,9 @@ repeat:
+               __SetPageSwapBacked(page);
+               __set_page_locked(page);
++              if (sgp == SGP_WRITE)
++                      init_page_accessed(page);
++
+               error = mem_cgroup_cache_charge(page, current->mm,
+                                               gfp & GFP_RECLAIM_MASK);
+               if (error)
+@@ -1438,13 +1447,9 @@ shmem_write_begin(struct file *file, str
+                       loff_t pos, unsigned len, unsigned flags,
+                       struct page **pagep, void **fsdata)
+ {
+-      int ret;
+       struct inode *inode = mapping->host;
+       pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+-      ret = shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
+-      if (ret == 0 && *pagep)
+-              init_page_accessed(*pagep);
+-      return ret;
++      return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
+ }
+ static int
diff --git a/queue-3.14/vmalloc-use-rcu-list-iterator-to-reduce-vmap_area_lock-contention.patch b/queue-3.14/vmalloc-use-rcu-list-iterator-to-reduce-vmap_area_lock-contention.patch
new file mode 100644 (file)
index 0000000..cae32e8
--- /dev/null
@@ -0,0 +1,109 @@
+From 474750aba88817c53f39424e5567b8e4acc4b39b Mon Sep 17 00:00:00 2001
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Date: Wed, 6 Aug 2014 16:05:06 -0700
+Subject: vmalloc: use rcu list iterator to reduce vmap_area_lock contention
+
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+
+commit 474750aba88817c53f39424e5567b8e4acc4b39b upstream.
+
+Richard Yao reported a month ago that his system have a trouble with
+vmap_area_lock contention during performance analysis by /proc/meminfo.
+Andrew asked why his analysis checks /proc/meminfo stressfully, but he
+didn't answer it.
+
+  https://lkml.org/lkml/2014/4/10/416
+
+Although I'm not sure that this is right usage or not, there is a
+solution reducing vmap_area_lock contention with no side-effect.  That
+is just to use rcu list iterator in get_vmalloc_info().
+
+rcu can be used in this function because all RCU protocol is already
+respected by writers, since Nick Piggin commit db64fe02258f1 ("mm:
+rewrite vmap layer") back in linux-2.6.28
+
+Specifically :
+   insertions use list_add_rcu(),
+   deletions use list_del_rcu() and kfree_rcu().
+
+Note the rb tree is not used from rcu reader (it would not be safe),
+only the vmap_area_list has full RCU protection.
+
+Note that __purge_vmap_area_lazy() already uses this rcu protection.
+
+        rcu_read_lock();
+        list_for_each_entry_rcu(va, &vmap_area_list, list) {
+                if (va->flags & VM_LAZY_FREE) {
+                        if (va->va_start < *start)
+                                *start = va->va_start;
+                        if (va->va_end > *end)
+                                *end = va->va_end;
+                        nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
+                        list_add_tail(&va->purge_list, &valist);
+                        va->flags |= VM_LAZY_FREEING;
+                        va->flags &= ~VM_LAZY_FREE;
+                }
+        }
+        rcu_read_unlock();
+
+Peter:
+
+: While rcu list traversal over the vmap_area_list is safe, this may
+: arrive at different results than the spinlocked version. The rcu list
+: traversal version will not be a 'snapshot' of a single, valid instant
+: of the entire vmap_area_list, but rather a potential amalgam of
+: different list states.
+
+Joonsoo:
+
+: Yes, you are right, but I don't think that we should be strict here.
+: Meminfo is already not a 'snapshot' at specific time.  While we try to get
+: certain stats, the other stats can change.  And, although we may arrive at
+: different results than the spinlocked version, the difference would not be
+: large and would not make serious side-effect.
+
+[edumazet@google.com: add more commit description]
+Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Reported-by: Richard Yao <ryao@gentoo.org>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Cc: Peter Hurley <peter@hurleysoftware.com>
+Cc: Zhang Yanfei <zhangyanfei.yes@gmail.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Andi Kleen <andi@firstfloor.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmalloc.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -2681,14 +2681,14 @@ void get_vmalloc_info(struct vmalloc_inf
+       prev_end = VMALLOC_START;
+-      spin_lock(&vmap_area_lock);
++      rcu_read_lock();
+       if (list_empty(&vmap_area_list)) {
+               vmi->largest_chunk = VMALLOC_TOTAL;
+               goto out;
+       }
+-      list_for_each_entry(va, &vmap_area_list, list) {
++      list_for_each_entry_rcu(va, &vmap_area_list, list) {
+               unsigned long addr = va->va_start;
+               /*
+@@ -2715,7 +2715,7 @@ void get_vmalloc_info(struct vmalloc_inf
+               vmi->largest_chunk = VMALLOC_END - prev_end;
+ out:
+-      spin_unlock(&vmap_area_lock);
++      rcu_read_unlock();
+ }
+ #endif