From a2eb460d85bfa3d8a29232684f373c1e131492f9 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 7 Oct 2014 11:11:59 -0700
Subject: [PATCH] 3.14-stable patches

added patches:
	mm-per-thread-vma-caching.patch
---
 queue-3.14/mm-per-thread-vma-caching.patch | 606 +++++++++++++++++++++
 queue-3.14/series                          |   1 +
 2 files changed, 607 insertions(+)
 create mode 100644 queue-3.14/mm-per-thread-vma-caching.patch

diff --git a/queue-3.14/mm-per-thread-vma-caching.patch b/queue-3.14/mm-per-thread-vma-caching.patch
new file mode 100644
index 00000000000..2f63fef7a98
--- /dev/null
+++ b/queue-3.14/mm-per-thread-vma-caching.patch
@@ -0,0 +1,606 @@
+From 615d6e8756c87149f2d4c1b93d471bca002bd849 Mon Sep 17 00:00:00 2001
+From: Davidlohr Bueso <davidlohr@hp.com>
+Date: Mon, 7 Apr 2014 15:37:25 -0700
+Subject: mm: per-thread vma caching
+
+From: Davidlohr Bueso <davidlohr@hp.com>
+
+commit 615d6e8756c87149f2d4c1b93d471bca002bd849 upstream.
+
+This patch is a continuation of efforts trying to optimize find_vma(),
+avoiding potentially expensive rbtree walks to locate a vma upon faults.
+The original approach (https://lkml.org/lkml/2013/11/1/410), where the
+largest vma was also cached, ended up being too specific and random,
+thus further comparison with other approaches were needed.  There are
+two things to consider when dealing with this, the cache hit rate and
+the latency of find_vma().  Improving the hit-rate does not necessarily
+translate in finding the vma any faster, as the overhead of any fancy
+caching schemes can be too high to consider.
+
+We currently cache the last used vma for the whole address space, which
+provides a nice optimization, reducing the total cycles in find_vma() by
+up to 250%, for workloads with good locality.  On the other hand, this
+simple scheme is pretty much useless for workloads with poor locality.
+Analyzing ebizzy runs shows that, no matter how many threads are
+running, the mmap_cache hit rate is less than 2%, and in many situations
+below 1%.
+
+The proposed approach is to replace this scheme with a small per-thread
+cache, maximizing hit rates at a very low maintenance cost.
+Invalidations are performed by simply bumping up a 32-bit sequence
+number.  The only expensive operation is in the rare case of a seq
+number overflow, where all caches that share the same address space are
+flushed.  Upon a miss, the proposed replacement policy is based on the
+page number that contains the virtual address in question.  Concretely,
+the following results are seen on an 80 core, 8 socket x86-64 box:
+
+1) System bootup: Most programs are single threaded, so the per-thread
+   scheme does improve ~50% hit rate by just adding a few more slots to
+   the cache.
+
++----------------+----------+------------------+
+| caching scheme | hit-rate | cycles (billion) |
++----------------+----------+------------------+
+| baseline       | 50.61%   | 19.90            |
+| patched        | 73.45%   | 13.58            |
++----------------+----------+------------------+
+
+2) Kernel build: This one is already pretty good with the current
+   approach as we're dealing with good locality.
+
++----------------+----------+------------------+
+| caching scheme | hit-rate | cycles (billion) |
++----------------+----------+------------------+
+| baseline       | 75.28%   | 11.03            |
+| patched        | 88.09%   | 9.31             |
++----------------+----------+------------------+
+
+3) Oracle 11g Data Mining (4k pages): Similar to the kernel build workload.
+
++----------------+----------+------------------+
+| caching scheme | hit-rate | cycles (billion) |
++----------------+----------+------------------+
+| baseline       | 70.66%   | 17.14            |
+| patched        | 91.15%   | 12.57            |
++----------------+----------+------------------+
+
+4) Ebizzy: There's a fair amount of variation from run to run, but this
+   approach always shows nearly perfect hit rates, while baseline is just
+   about non-existent.  The amounts of cycles can fluctuate between
+   anywhere from ~60 to ~116 for the baseline scheme, but this approach
+   reduces it considerably.  For instance, with 80 threads:
+
++----------------+----------+------------------+
+| caching scheme | hit-rate | cycles (billion) |
++----------------+----------+------------------+
+| baseline       | 1.06%    | 91.54            |
+| patched        | 99.97%   | 14.18            |
++----------------+----------+------------------+
+
+[akpm@linux-foundation.org: fix nommu build, per Davidlohr]
+[akpm@linux-foundation.org: document vmacache_valid() logic]
+[akpm@linux-foundation.org: attempt to untangle header files]
+[akpm@linux-foundation.org: add vmacache_find() BUG_ON]
+[hughd@google.com: add vmacache_valid_mm() (from Oleg)]
+[akpm@linux-foundation.org: coding-style fixes]
+[akpm@linux-foundation.org: adjust and enhance comments]
+Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
+Reviewed-by: Michel Lespinasse <walken@google.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Tested-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/unicore32/include/asm/mmu_context.h |    4 -
+ fs/exec.c                                |    5 +
+ fs/proc/task_mmu.c                       |    3 
+ include/linux/mm_types.h                 |    4 -
+ include/linux/sched.h                    |    7 +
+ include/linux/vmacache.h                 |   38 ++++++++++
+ kernel/debug/debug_core.c                |   14 +++
+ kernel/fork.c                            |    7 +
+ mm/Makefile                              |    2 
+ mm/mmap.c                                |   51 +++++++-------
+ mm/nommu.c                               |   24 ++++--
+ mm/vmacache.c                            |  112 +++++++++++++++++++++++++++++++
+ 12 files changed, 229 insertions(+), 42 deletions(-)
+
+--- a/arch/unicore32/include/asm/mmu_context.h
++++ b/arch/unicore32/include/asm/mmu_context.h
+@@ -14,6 +14,8 @@
+ 
+ #include <linux/compiler.h>
+ #include <linux/sched.h>
++#include <linux/mm.h>
++#include <linux/vmacache.h>
+ #include <linux/io.h>
+ 
+ #include <asm/cacheflush.h>
+@@ -73,7 +75,7 @@ do { \
+ 		else \
+ 			mm->mmap = NULL; \
+ 		rb_erase(&high_vma->vm_rb, &mm->mm_rb); \
+-		mm->mmap_cache = NULL; \
++		vmacache_invalidate(mm); \
+ 		mm->map_count--; \
+ 		remove_vma(high_vma); \
+ 	} \
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -26,6 +26,7 @@
+ #include <linux/file.h>
+ #include <linux/fdtable.h>
+ #include <linux/mm.h>
++#include <linux/vmacache.h>
+ #include <linux/stat.h>
+ #include <linux/fcntl.h>
+ #include <linux/swap.h>
+@@ -820,7 +821,7 @@ EXPORT_SYMBOL(read_code);
+ static int exec_mmap(struct mm_struct *mm)
+ {
+ 	struct task_struct *tsk;
+-	struct mm_struct * old_mm, *active_mm;
++	struct mm_struct *old_mm, *active_mm;
+ 
+ 	/* Notify parent that we're no longer interested in the old VM */
+ 	tsk = current;
+@@ -846,6 +847,8 @@ static int exec_mmap(struct mm_struct *m
+ 	tsk->mm = mm;
+ 	tsk->active_mm = mm;
+ 	activate_mm(active_mm, mm);
++	tsk->mm->vmacache_seqnum = 0;
++	vmacache_flush(tsk);
+ 	task_unlock(tsk);
+ 	if (old_mm) {
+ 		up_read(&old_mm->mmap_sem);
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -1,4 +1,5 @@
+ #include <linux/mm.h>
++#include <linux/vmacache.h>
+ #include <linux/hugetlb.h>
+ #include <linux/huge_mm.h>
+ #include <linux/mount.h>
+@@ -152,7 +153,7 @@ static void *m_start(struct seq_file *m,
+ 
+ 	/*
+ 	 * We remember last_addr rather than next_addr to hit with
+-	 * mmap_cache most of the time. We have zero last_addr at
++	 * vmacache most of the time. We have zero last_addr at
+ 	 * the beginning and also after lseek. We will have -1 last_addr
+ 	 * after the end of the vmas.
+ 	 */
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -342,9 +342,9 @@ struct mm_rss_stat {
+ 
+ struct kioctx_table;
+ struct mm_struct {
+-	struct vm_area_struct * mmap;		/* list of VMAs */
++	struct vm_area_struct *mmap;		/* list of VMAs */
+ 	struct rb_root mm_rb;
+-	struct vm_area_struct * mmap_cache;	/* last find_vma result */
++	u32 vmacache_seqnum;                   /* per-thread vmacache */
+ #ifdef CONFIG_MMU
+ 	unsigned long (*get_unmapped_area) (struct file *filp,
+ 				unsigned long addr, unsigned long len,
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -59,6 +59,10 @@ struct sched_param {
+ 
+ #define SCHED_ATTR_SIZE_VER0	48	/* sizeof first published struct */
+ 
++#define VMACACHE_BITS 2
++#define VMACACHE_SIZE (1U << VMACACHE_BITS)
++#define VMACACHE_MASK (VMACACHE_SIZE - 1)
++
+ /*
+  * Extended scheduling parameters data structure.
+  *
+@@ -1228,6 +1232,9 @@ struct task_struct {
+ #ifdef CONFIG_COMPAT_BRK
+ 	unsigned brk_randomized:1;
+ #endif
++	/* per-thread vma caching */
++	u32 vmacache_seqnum;
++	struct vm_area_struct *vmacache[VMACACHE_SIZE];
+ #if defined(SPLIT_RSS_COUNTING)
+ 	struct task_rss_stat	rss_stat;
+ #endif
+--- /dev/null
++++ b/include/linux/vmacache.h
+@@ -0,0 +1,38 @@
++#ifndef __LINUX_VMACACHE_H
++#define __LINUX_VMACACHE_H
++
++#include <linux/sched.h>
++#include <linux/mm.h>
++
++/*
++ * Hash based on the page number. Provides a good hit rate for
++ * workloads with good locality and those with random accesses as well.
++ */
++#define VMACACHE_HASH(addr) ((addr >> PAGE_SHIFT) & VMACACHE_MASK)
++
++static inline void vmacache_flush(struct task_struct *tsk)
++{
++	memset(tsk->vmacache, 0, sizeof(tsk->vmacache));
++}
++
++extern void vmacache_flush_all(struct mm_struct *mm);
++extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma);
++extern struct vm_area_struct *vmacache_find(struct mm_struct *mm,
++						    unsigned long addr);
++
++#ifndef CONFIG_MMU
++extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
++						  unsigned long start,
++						  unsigned long end);
++#endif
++
++static inline void vmacache_invalidate(struct mm_struct *mm)
++{
++	mm->vmacache_seqnum++;
++
++	/* deal with overflows */
++	if (unlikely(mm->vmacache_seqnum == 0))
++		vmacache_flush_all(mm);
++}
++
++#endif /* __LINUX_VMACACHE_H */
+--- a/kernel/debug/debug_core.c
++++ b/kernel/debug/debug_core.c
+@@ -49,6 +49,7 @@
+ #include <linux/pid.h>
+ #include <linux/smp.h>
+ #include <linux/mm.h>
++#include <linux/vmacache.h>
+ #include <linux/rcupdate.h>
+ 
+ #include <asm/cacheflush.h>
+@@ -224,10 +225,17 @@ static void kgdb_flush_swbreak_addr(unsi
+ 	if (!CACHE_FLUSH_IS_SAFE)
+ 		return;
+ 
+-	if (current->mm && current->mm->mmap_cache) {
+-		flush_cache_range(current->mm->mmap_cache,
+-				  addr, addr + BREAK_INSTR_SIZE);
++	if (current->mm) {
++		int i;
++
++		for (i = 0; i < VMACACHE_SIZE; i++) {
++			if (!current->vmacache[i])
++				continue;
++			flush_cache_range(current->vmacache[i],
++					  addr, addr + BREAK_INSTR_SIZE);
++		}
+ 	}
++
+ 	/* Force flush instruction cache if it was outside the mm */
+ 	flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
+ }
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -28,6 +28,8 @@
+ #include <linux/mman.h>
+ #include <linux/mmu_notifier.h>
+ #include <linux/fs.h>
++#include <linux/mm.h>
++#include <linux/vmacache.h>
+ #include <linux/nsproxy.h>
+ #include <linux/capability.h>
+ #include <linux/cpu.h>
+@@ -363,7 +365,7 @@ static int dup_mmap(struct mm_struct *mm
+ 
+ 	mm->locked_vm = 0;
+ 	mm->mmap = NULL;
+-	mm->mmap_cache = NULL;
++	mm->vmacache_seqnum = 0;
+ 	mm->map_count = 0;
+ 	cpumask_clear(mm_cpumask(mm));
+ 	mm->mm_rb = RB_ROOT;
+@@ -876,6 +878,9 @@ static int copy_mm(unsigned long clone_f
+ 	if (!oldmm)
+ 		return 0;
+ 
++	/* initialize the new vmacache entries */
++	vmacache_flush(tsk);
++
+ 	if (clone_flags & CLONE_VM) {
+ 		atomic_inc(&oldmm->mm_users);
+ 		mm = oldmm;
+--- a/mm/Makefile
++++ b/mm/Makefile
+@@ -16,7 +16,7 @@ obj-y			:= filemap.o mempool.o oom_kill.
+ 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
+ 			   util.o mmzone.o vmstat.o backing-dev.o \
+ 			   mm_init.o mmu_context.o percpu.o slab_common.o \
+-			   compaction.o balloon_compaction.o \
++			   compaction.o balloon_compaction.o vmacache.o \
+ 			   interval_tree.o list_lru.o $(mmu-y)
+ 
+ obj-y += init-mm.o
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -10,6 +10,7 @@
+ #include <linux/slab.h>
+ #include <linux/backing-dev.h>
+ #include <linux/mm.h>
++#include <linux/vmacache.h>
+ #include <linux/shm.h>
+ #include <linux/mman.h>
+ #include <linux/pagemap.h>
+@@ -681,8 +682,9 @@ __vma_unlink(struct mm_struct *mm, struc
+ 	prev->vm_next = next = vma->vm_next;
+ 	if (next)
+ 		next->vm_prev = prev;
+-	if (mm->mmap_cache == vma)
+-		mm->mmap_cache = prev;
++
++	/* Kill the cache */
++	vmacache_invalidate(mm);
+ }
+ 
+ /*
+@@ -1989,34 +1991,33 @@ EXPORT_SYMBOL(get_unmapped_area);
+ /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
+ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
+ {
+-	struct vm_area_struct *vma = NULL;
++	struct rb_node *rb_node;
++	struct vm_area_struct *vma;
+ 
+ 	/* Check the cache first. */
+-	/* (Cache hit rate is typically around 35%.) */
+-	vma = ACCESS_ONCE(mm->mmap_cache);
+-	if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
+-		struct rb_node *rb_node;
++	vma = vmacache_find(mm, addr);
++	if (likely(vma))
++		return vma;
+ 
+-		rb_node = mm->mm_rb.rb_node;
+-		vma = NULL;
++	rb_node = mm->mm_rb.rb_node;
++	vma = NULL;
+ 
+-		while (rb_node) {
+-			struct vm_area_struct *vma_tmp;
++	while (rb_node) {
++		struct vm_area_struct *tmp;
+ 
+-			vma_tmp = rb_entry(rb_node,
+-					   struct vm_area_struct, vm_rb);
++		tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+ 
+-			if (vma_tmp->vm_end > addr) {
+-				vma = vma_tmp;
+-				if (vma_tmp->vm_start <= addr)
+-					break;
+-				rb_node = rb_node->rb_left;
+-			} else
+-				rb_node = rb_node->rb_right;
+-		}
+-		if (vma)
+-			mm->mmap_cache = vma;
++		if (tmp->vm_end > addr) {
++			vma = tmp;
++			if (tmp->vm_start <= addr)
++				break;
++			rb_node = rb_node->rb_left;
++		} else
++			rb_node = rb_node->rb_right;
+ 	}
++
++	if (vma)
++		vmacache_update(addr, vma);
+ 	return vma;
+ }
+ 
+@@ -2388,7 +2389,9 @@ detach_vmas_to_be_unmapped(struct mm_str
+ 	} else
+ 		mm->highest_vm_end = prev ? prev->vm_end : 0;
+ 	tail_vma->vm_next = NULL;
+-	mm->mmap_cache = NULL;		/* Kill the cache. */
++
++	/* Kill the cache */
++	vmacache_invalidate(mm);
+ }
+ 
+ /*
+--- a/mm/nommu.c
++++ b/mm/nommu.c
+@@ -15,6 +15,7 @@
+ 
+ #include <linux/export.h>
+ #include <linux/mm.h>
++#include <linux/vmacache.h>
+ #include <linux/mman.h>
+ #include <linux/swap.h>
+ #include <linux/file.h>
+@@ -768,16 +769,23 @@ static void add_vma_to_mm(struct mm_stru
+  */
+ static void delete_vma_from_mm(struct vm_area_struct *vma)
+ {
++	int i;
+ 	struct address_space *mapping;
+ 	struct mm_struct *mm = vma->vm_mm;
++	struct task_struct *curr = current;
+ 
+ 	kenter("%p", vma);
+ 
+ 	protect_vma(vma, 0);
+ 
+ 	mm->map_count--;
+-	if (mm->mmap_cache == vma)
+-		mm->mmap_cache = NULL;
++	for (i = 0; i < VMACACHE_SIZE; i++) {
++		/* if the vma is cached, invalidate the entire cache */
++		if (curr->vmacache[i] == vma) {
++			vmacache_invalidate(curr->mm);
++			break;
++		}
++	}
+ 
+ 	/* remove the VMA from the mapping */
+ 	if (vma->vm_file) {
+@@ -825,8 +833,8 @@ struct vm_area_struct *find_vma(struct m
+ 	struct vm_area_struct *vma;
+ 
+ 	/* check the cache first */
+-	vma = ACCESS_ONCE(mm->mmap_cache);
+-	if (vma && vma->vm_start <= addr && vma->vm_end > addr)
++	vma = vmacache_find(mm, addr);
++	if (likely(vma))
+ 		return vma;
+ 
+ 	/* trawl the list (there may be multiple mappings in which addr
+@@ -835,7 +843,7 @@ struct vm_area_struct *find_vma(struct m
+ 		if (vma->vm_start > addr)
+ 			return NULL;
+ 		if (vma->vm_end > addr) {
+-			mm->mmap_cache = vma;
++			vmacache_update(addr, vma);
+ 			return vma;
+ 		}
+ 	}
+@@ -874,8 +882,8 @@ static struct vm_area_struct *find_vma_e
+ 	unsigned long end = addr + len;
+ 
+ 	/* check the cache first */
+-	vma = mm->mmap_cache;
+-	if (vma && vma->vm_start == addr && vma->vm_end == end)
++	vma = vmacache_find_exact(mm, addr, end);
++	if (vma)
+ 		return vma;
+ 
+ 	/* trawl the list (there may be multiple mappings in which addr
+@@ -886,7 +894,7 @@ static struct vm_area_struct *find_vma_e
+ 		if (vma->vm_start > addr)
+ 			return NULL;
+ 		if (vma->vm_end == end) {
+-			mm->mmap_cache = vma;
++			vmacache_update(addr, vma);
+ 			return vma;
+ 		}
+ 	}
+--- /dev/null
++++ b/mm/vmacache.c
+@@ -0,0 +1,112 @@
++/*
++ * Copyright (C) 2014 Davidlohr Bueso.
++ */
++#include <linux/sched.h>
++#include <linux/mm.h>
++#include <linux/vmacache.h>
++
++/*
++ * Flush vma caches for threads that share a given mm.
++ *
++ * The operation is safe because the caller holds the mmap_sem
++ * exclusively and other threads accessing the vma cache will
++ * have mmap_sem held at least for read, so no extra locking
++ * is required to maintain the vma cache.
++ */
++void vmacache_flush_all(struct mm_struct *mm)
++{
++	struct task_struct *g, *p;
++
++	rcu_read_lock();
++	for_each_process_thread(g, p) {
++		/*
++		 * Only flush the vmacache pointers as the
++		 * mm seqnum is already set and curr's will
++		 * be set upon invalidation when the next
++		 * lookup is done.
++		 */
++		if (mm == p->mm)
++			vmacache_flush(p);
++	}
++	rcu_read_unlock();
++}
++
++/*
++ * This task may be accessing a foreign mm via (for example)
++ * get_user_pages()->find_vma().  The vmacache is task-local and this
++ * task's vmacache pertains to a different mm (ie, its own).  There is
++ * nothing we can do here.
++ *
++ * Also handle the case where a kernel thread has adopted this mm via use_mm().
++ * That kernel thread's vmacache is not applicable to this mm.
++ */
++static bool vmacache_valid_mm(struct mm_struct *mm)
++{
++	return current->mm == mm && !(current->flags & PF_KTHREAD);
++}
++
++void vmacache_update(unsigned long addr, struct vm_area_struct *newvma)
++{
++	if (vmacache_valid_mm(newvma->vm_mm))
++		current->vmacache[VMACACHE_HASH(addr)] = newvma;
++}
++
++static bool vmacache_valid(struct mm_struct *mm)
++{
++	struct task_struct *curr;
++
++	if (!vmacache_valid_mm(mm))
++		return false;
++
++	curr = current;
++	if (mm->vmacache_seqnum != curr->vmacache_seqnum) {
++		/*
++		 * First attempt will always be invalid, initialize
++		 * the new cache for this task here.
++		 */
++		curr->vmacache_seqnum = mm->vmacache_seqnum;
++		vmacache_flush(curr);
++		return false;
++	}
++	return true;
++}
++
++struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
++{
++	int i;
++
++	if (!vmacache_valid(mm))
++		return NULL;
++
++	for (i = 0; i < VMACACHE_SIZE; i++) {
++		struct vm_area_struct *vma = current->vmacache[i];
++
++		if (vma && vma->vm_start <= addr && vma->vm_end > addr) {
++			BUG_ON(vma->vm_mm != mm);
++			return vma;
++		}
++	}
++
++	return NULL;
++}
++
++#ifndef CONFIG_MMU
++struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
++					   unsigned long start,
++					   unsigned long end)
++{
++	int i;
++
++	if (!vmacache_valid(mm))
++		return NULL;
++
++	for (i = 0; i < VMACACHE_SIZE; i++) {
++		struct vm_area_struct *vma = current->vmacache[i];
++
++		if (vma && vma->vm_start == start && vma->vm_end == end)
++			return vma;
++	}
++
++	return NULL;
++}
++#endif
diff --git a/queue-3.14/series b/queue-3.14/series
index bd61f8b7e81..273450bb569 100644
--- a/queue-3.14/series
+++ b/queue-3.14/series
@@ -33,3 +33,4 @@ mm-filemap.c-avoid-always-dirtying-mapping-flags-on-o_direct.patch
 mm-vmscan-respect-numa-policy-mask-when-shrinking-slab-on-direct-reclaim.patch
 mm-vmscan-shrink_slab-rename-max_pass-freeable.patch
 vmscan-reclaim_clean_pages_from_list-must-use-mod_zone_page_state.patch
+mm-per-thread-vma-caching.patch
-- 
2.47.3