From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 6 Jan 2014 20:02:48 +0000 (-0800)
Subject: 3.12-stable patches
X-Git-Tag: v3.4.76~32
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=74ea58e9fbf384715be0b23e86e87f5243f87057;p=thirdparty%2Fkernel%2Fstable-queue.git

3.12-stable patches

added patches:
	mm-compaction-respect-ignore_skip_hint-in-update_pageblock_skip.patch
	mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch
	mm-fix-use-after-free-in-sys_remap_file_pages.patch
	mm-hugetlb-check-for-pte-null-pointer-in-__page_check_address.patch
	mm-memory-failure.c-recheck-pagehuge-after-hugetlb-page-migrate-successfully.patch
	mm-memory-failure.c-transfer-page-count-from-head-page-to-tail-page-after-split-thp.patch
	mm-mempolicy-correct-putback-method-for-isolate-pages-if-failed.patch
	mm-munlock-fix-a-bug-where-thp-tail-page-is-encountered.patch
	mm-munlock-fix-deadlock-in-__munlock_pagevec.patch
	mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch
	mm-page_alloc-revert-numa-aspect-of-fair-allocation-policy.patch
---

diff --git a/queue-3.12/mm-compaction-respect-ignore_skip_hint-in-update_pageblock_skip.patch b/queue-3.12/mm-compaction-respect-ignore_skip_hint-in-update_pageblock_skip.patch
new file mode 100644
index 00000000000..fe507b2160c
--- /dev/null
+++ b/queue-3.12/mm-compaction-respect-ignore_skip_hint-in-update_pageblock_skip.patch
@@ -0,0 +1,47 @@
+From 6815bf3f233e0b10c99a758497d5d236063b010b Mon Sep 17 00:00:00 2001
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Date: Wed, 18 Dec 2013 17:08:52 -0800
+Subject: mm/compaction: respect ignore_skip_hint in update_pageblock_skip
+
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+
+commit 6815bf3f233e0b10c99a758497d5d236063b010b upstream.
+
+update_pageblock_skip() only fits to compaction which tries to isolate
+by pageblock unit.  If isolate_migratepages_range() is called by CMA, it
+try to isolate regardless of pageblock unit and it don't reference
+get_pageblock_skip() by ignore_skip_hint.  We should also respect it on
+update_pageblock_skip() to prevent from setting the wrong information.
+
+Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Rafael Aquini <aquini@redhat.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -134,6 +134,10 @@ static void update_pageblock_skip(struct
+ 			bool migrate_scanner)
+ {
+ 	struct zone *zone = cc->zone;
++
++	if (cc->ignore_skip_hint)
++		return;
++
+ 	if (!page)
+ 		return;
+ 
diff --git a/queue-3.12/mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch b/queue-3.12/mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch
new file mode 100644
index 00000000000..7f70b77f19c
--- /dev/null
+++ b/queue-3.12/mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch
@@ -0,0 +1,242 @@
+From 20841405940e7be0617612d521e206e4b6b325db Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@redhat.com>
+Date: Wed, 18 Dec 2013 17:08:44 -0800
+Subject: mm: fix TLB flush race between migration, and change_protection_range
+
+From: Rik van Riel <riel@redhat.com>
+
+commit 20841405940e7be0617612d521e206e4b6b325db upstream.
+
+There are a few subtle races, between change_protection_range (used by
+mprotect and change_prot_numa) on one side, and NUMA page migration and
+compaction on the other side.
+
+The basic race is that there is a time window between when the PTE gets
+made non-present (PROT_NONE or NUMA), and the TLB is flushed.
+
+During that time, a CPU may continue writing to the page.
+
+This is fine most of the time, however compaction or the NUMA migration
+code may come in, and migrate the page away.
+
+When that happens, the CPU may continue writing, through the cached
+translation, to what is no longer the current memory location of the
+process.
+
+This only affects x86, which has a somewhat optimistic pte_accessible.
+All other architectures appear to be safe, and will either always flush,
+or flush whenever there is a valid mapping, even with no permissions
+(SPARC).
+
+The basic race looks like this:
+
+CPU A			CPU B			CPU C
+
+						load TLB entry
+make entry PTE/PMD_NUMA
+			fault on entry
+						read/write old page
+			start migrating page
+			change PTE/PMD to new page
+						read/write old page [*]
+flush TLB
+						reload TLB from new entry
+						read/write new page
+						lose data
+
+[*] the old page may belong to a new user at this point!
+
+The obvious fix is to flush remote TLB entries, by making sure that
+pte_accessible aware of the fact that PROT_NONE and PROT_NUMA memory may
+still be accessible if there is a TLB flush pending for the mm.
+
+This should fix both NUMA migration and compaction.
+
+[mgorman@suse.de: fix build]
+Signed-off-by: Rik van Riel <riel@redhat.com>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Cc: Alex Thorlton <athorlton@sgi.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/sparc/include/asm/pgtable_64.h |    4 +--
+ arch/x86/include/asm/pgtable.h      |   11 +++++++--
+ include/asm-generic/pgtable.h       |    2 -
+ include/linux/mm_types.h            |   44 ++++++++++++++++++++++++++++++++++++
+ kernel/fork.c                       |    1 
+ mm/huge_memory.c                    |    7 +++++
+ mm/mprotect.c                       |    2 +
+ mm/pgtable-generic.c                |    5 ++--
+ 8 files changed, 69 insertions(+), 7 deletions(-)
+
+--- a/arch/sparc/include/asm/pgtable_64.h
++++ b/arch/sparc/include/asm/pgtable_64.h
+@@ -616,7 +616,7 @@ static inline unsigned long pte_present(
+ }
+ 
+ #define pte_accessible pte_accessible
+-static inline unsigned long pte_accessible(pte_t a)
++static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a)
+ {
+ 	return pte_val(a) & _PAGE_VALID;
+ }
+@@ -806,7 +806,7 @@ static inline void __set_pte_at(struct m
+ 	 * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
+ 	 *             and SUN4V pte layout, so this inline test is fine.
+ 	 */
+-	if (likely(mm != &init_mm) && pte_accessible(orig))
++	if (likely(mm != &init_mm) && pte_accessible(mm, orig))
+ 		tlb_batch_add(mm, addr, ptep, orig, fullmm);
+ }
+ 
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -452,9 +452,16 @@ static inline int pte_present(pte_t a)
+ }
+ 
+ #define pte_accessible pte_accessible
+-static inline int pte_accessible(pte_t a)
++static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
+ {
+-	return pte_flags(a) & _PAGE_PRESENT;
++	if (pte_flags(a) & _PAGE_PRESENT)
++		return true;
++
++	if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) &&
++			mm_tlb_flush_pending(mm))
++		return true;
++
++	return false;
+ }
+ 
+ static inline int pte_hidden(pte_t pte)
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -217,7 +217,7 @@ static inline int pmd_same(pmd_t pmd_a,
+ #endif
+ 
+ #ifndef pte_accessible
+-# define pte_accessible(pte)		((void)(pte),1)
++# define pte_accessible(mm, pte)	((void)(pte), 1)
+ #endif
+ 
+ #ifndef flush_tlb_fix_spurious_fault
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -435,6 +435,14 @@ struct mm_struct {
+ 	 */
+ 	int first_nid;
+ #endif
++#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
++	/*
++	 * An operation with batched TLB flushing is going on. Anything that
++	 * can move process memory needs to flush the TLB when moving a
++	 * PROT_NONE or PROT_NUMA mapped page.
++	 */
++	bool tlb_flush_pending;
++#endif
+ 	struct uprobes_state uprobes_state;
+ };
+ 
+@@ -455,4 +463,40 @@ static inline cpumask_t *mm_cpumask(stru
+ 	return mm->cpu_vm_mask_var;
+ }
+ 
++#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
++/*
++ * Memory barriers to keep this state in sync are graciously provided by
++ * the page table locks, outside of which no page table modifications happen.
++ * The barriers below prevent the compiler from re-ordering the instructions
++ * around the memory barriers that are already present in the code.
++ */
++static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
++{
++	barrier();
++	return mm->tlb_flush_pending;
++}
++static inline void set_tlb_flush_pending(struct mm_struct *mm)
++{
++	mm->tlb_flush_pending = true;
++	barrier();
++}
++/* Clearing is done after a TLB flush, which also provides a barrier. */
++static inline void clear_tlb_flush_pending(struct mm_struct *mm)
++{
++	barrier();
++	mm->tlb_flush_pending = false;
++}
++#else
++static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
++{
++	return false;
++}
++static inline void set_tlb_flush_pending(struct mm_struct *mm)
++{
++}
++static inline void clear_tlb_flush_pending(struct mm_struct *mm)
++{
++}
++#endif
++
+ #endif /* _LINUX_MM_TYPES_H */
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -537,6 +537,7 @@ static struct mm_struct *mm_init(struct
+ 	spin_lock_init(&mm->page_table_lock);
+ 	mm_init_aio(mm);
+ 	mm_init_owner(mm, p);
++	clear_tlb_flush_pending(mm);
+ 
+ 	if (likely(!mm_alloc_pgd(mm))) {
+ 		mm->def_flags = 0;
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1342,6 +1342,13 @@ int do_huge_pmd_numa_page(struct mm_stru
+ 	}
+ 
+ 	/*
++	 * The page_table_lock above provides a memory barrier
++	 * with change_protection_range.
++	 */
++	if (mm_tlb_flush_pending(mm))
++		flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
++
++	/*
+ 	 * Migrate the THP to the requested node, returns with page unlocked
+ 	 * and pmd_numa cleared.
+ 	 */
+--- a/mm/mprotect.c
++++ b/mm/mprotect.c
+@@ -209,6 +209,7 @@ static unsigned long change_protection_r
+ 	BUG_ON(addr >= end);
+ 	pgd = pgd_offset(mm, addr);
+ 	flush_cache_range(vma, addr, end);
++	set_tlb_flush_pending(mm);
+ 	do {
+ 		next = pgd_addr_end(addr, end);
+ 		if (pgd_none_or_clear_bad(pgd))
+@@ -220,6 +221,7 @@ static unsigned long change_protection_r
+ 	/* Only flush the TLB if we actually modified any entries: */
+ 	if (pages)
+ 		flush_tlb_range(vma, start, end);
++	clear_tlb_flush_pending(mm);
+ 
+ 	return pages;
+ }
+--- a/mm/pgtable-generic.c
++++ b/mm/pgtable-generic.c
+@@ -110,9 +110,10 @@ int pmdp_clear_flush_young(struct vm_are
+ pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address,
+ 		       pte_t *ptep)
+ {
++	struct mm_struct *mm = (vma)->vm_mm;
+ 	pte_t pte;
+-	pte = ptep_get_and_clear((vma)->vm_mm, address, ptep);
+-	if (pte_accessible(pte))
++	pte = ptep_get_and_clear(mm, address, ptep);
++	if (pte_accessible(mm, pte))
+ 		flush_tlb_page(vma, address);
+ 	return pte;
+ }
diff --git a/queue-3.12/mm-fix-use-after-free-in-sys_remap_file_pages.patch b/queue-3.12/mm-fix-use-after-free-in-sys_remap_file_pages.patch
new file mode 100644
index 00000000000..0aab157b8d5
--- /dev/null
+++ b/queue-3.12/mm-fix-use-after-free-in-sys_remap_file_pages.patch
@@ -0,0 +1,61 @@
+From 4eb919825e6c3c7fb3630d5621f6d11e98a18b3a Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel@redhat.com>
+Date: Thu, 2 Jan 2014 12:58:46 -0800
+Subject: mm: fix use-after-free in sys_remap_file_pages
+
+From: Rik van Riel <riel@redhat.com>
+
+commit 4eb919825e6c3c7fb3630d5621f6d11e98a18b3a upstream.
+
+remap_file_pages calls mmap_region, which may merge the VMA with other
+existing VMAs, and free "vma".  This can lead to a use-after-free bug.
+Avoid the bug by remembering vm_flags before calling mmap_region, and
+not trying to dereference vma later.
+
+Signed-off-by: Rik van Riel <riel@redhat.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: PaX Team <pageexec@freemail.hu>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/fremap.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/mm/fremap.c
++++ b/mm/fremap.c
+@@ -208,9 +208,10 @@ get_write_lock:
+ 		if (mapping_cap_account_dirty(mapping)) {
+ 			unsigned long addr;
+ 			struct file *file = get_file(vma->vm_file);
++			/* mmap_region may free vma; grab the info now */
++			vm_flags = vma->vm_flags;
+ 
+-			addr = mmap_region(file, start, size,
+-					vma->vm_flags, pgoff);
++			addr = mmap_region(file, start, size, vm_flags, pgoff);
+ 			fput(file);
+ 			if (IS_ERR_VALUE(addr)) {
+ 				err = addr;
+@@ -218,7 +219,7 @@ get_write_lock:
+ 				BUG_ON(addr != start);
+ 				err = 0;
+ 			}
+-			goto out;
++			goto out_freed;
+ 		}
+ 		mutex_lock(&mapping->i_mmap_mutex);
+ 		flush_dcache_mmap_lock(mapping);
+@@ -253,6 +254,7 @@ get_write_lock:
+ out:
+ 	if (vma)
+ 		vm_flags = vma->vm_flags;
++out_freed:
+ 	if (likely(!has_write_lock))
+ 		up_read(&mm->mmap_sem);
+ 	else
diff --git a/queue-3.12/mm-hugetlb-check-for-pte-null-pointer-in-__page_check_address.patch b/queue-3.12/mm-hugetlb-check-for-pte-null-pointer-in-__page_check_address.patch
new file mode 100644
index 00000000000..c6e89401d5e
--- /dev/null
+++ b/queue-3.12/mm-hugetlb-check-for-pte-null-pointer-in-__page_check_address.patch
@@ -0,0 +1,40 @@
+From 98398c32f6687ee1e1f3ae084effb4b75adb0747 Mon Sep 17 00:00:00 2001
+From: Jianguo Wu <wujianguo@huawei.com>
+Date: Wed, 18 Dec 2013 17:08:59 -0800
+Subject: mm/hugetlb: check for pte NULL pointer in __page_check_address()
+
+From: Jianguo Wu <wujianguo@huawei.com>
+
+commit 98398c32f6687ee1e1f3ae084effb4b75adb0747 upstream.
+
+In __page_check_address(), if address's pud is not present,
+huge_pte_offset() will return NULL, we should check the return value.
+
+Signed-off-by: Jianguo Wu <wujianguo@huawei.com>
+Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: qiuxishi <qiuxishi@huawei.com>
+Cc: Hanjun Guo <guohanjun@huawei.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/rmap.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -600,7 +600,11 @@ pte_t *__page_check_address(struct page
+ 	spinlock_t *ptl;
+ 
+ 	if (unlikely(PageHuge(page))) {
++		/* when pud is not present, pte will be NULL */
+ 		pte = huge_pte_offset(mm, address);
++		if (!pte)
++			return NULL;
++
+ 		ptl = &mm->page_table_lock;
+ 		goto check;
+ 	}
diff --git a/queue-3.12/mm-memory-failure.c-recheck-pagehuge-after-hugetlb-page-migrate-successfully.patch b/queue-3.12/mm-memory-failure.c-recheck-pagehuge-after-hugetlb-page-migrate-successfully.patch
new file mode 100644
index 00000000000..f3ebab03b76
--- /dev/null
+++ b/queue-3.12/mm-memory-failure.c-recheck-pagehuge-after-hugetlb-page-migrate-successfully.patch
@@ -0,0 +1,55 @@
+From a49ecbcd7b0d5a1cda7d60e03df402dd0ef76ac8 Mon Sep 17 00:00:00 2001
+From: Jianguo Wu <wujianguo@huawei.com>
+Date: Wed, 18 Dec 2013 17:08:54 -0800
+Subject: mm/memory-failure.c: recheck PageHuge() after hugetlb page migrate successfully
+
+From: Jianguo Wu <wujianguo@huawei.com>
+
+commit a49ecbcd7b0d5a1cda7d60e03df402dd0ef76ac8 upstream.
+
+After a successful hugetlb page migration by soft offline, the source
+page will either be freed into hugepage_freelists or buddy(over-commit
+page).  If page is in buddy, page_hstate(page) will be NULL.  It will
+hit a NULL pointer dereference in dequeue_hwpoisoned_huge_page().
+
+  BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
+  IP: [<ffffffff81163761>] dequeue_hwpoisoned_huge_page+0x131/0x1d0
+  PGD c23762067 PUD c24be2067 PMD 0
+  Oops: 0000 [#1] SMP
+
+So check PageHuge(page) after call migrate_pages() successfully.
+
+Signed-off-by: Jianguo Wu <wujianguo@huawei.com>
+Tested-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1519,10 +1519,16 @@ static int soft_offline_huge_page(struct
+ 		if (ret > 0)
+ 			ret = -EIO;
+ 	} else {
+-		set_page_hwpoison_huge_page(hpage);
+-		dequeue_hwpoisoned_huge_page(hpage);
+-		atomic_long_add(1 << compound_order(hpage),
+-				&num_poisoned_pages);
++		/* overcommit hugetlb page will be freed to buddy */
++		if (PageHuge(page)) {
++			set_page_hwpoison_huge_page(hpage);
++			dequeue_hwpoisoned_huge_page(hpage);
++			atomic_long_add(1 << compound_order(hpage),
++					&num_poisoned_pages);
++		} else {
++			SetPageHWPoison(page);
++			atomic_long_inc(&num_poisoned_pages);
++		}
+ 	}
+ 	return ret;
+ }
diff --git a/queue-3.12/mm-memory-failure.c-transfer-page-count-from-head-page-to-tail-page-after-split-thp.patch b/queue-3.12/mm-memory-failure.c-transfer-page-count-from-head-page-to-tail-page-after-split-thp.patch
new file mode 100644
index 00000000000..2ace1d26741
--- /dev/null
+++ b/queue-3.12/mm-memory-failure.c-transfer-page-count-from-head-page-to-tail-page-after-split-thp.patch
@@ -0,0 +1,90 @@
+From a3e0f9e47d5ef7858a26cc12d90ad5146e802d47 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Thu, 2 Jan 2014 12:58:51 -0800
+Subject: mm/memory-failure.c: transfer page count from head page to tail page after split thp
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit a3e0f9e47d5ef7858a26cc12d90ad5146e802d47 upstream.
+
+Memory failures on thp tail pages cause kernel panic like below:
+
+   mce: [Hardware Error]: Machine check events logged
+   MCE exception done on CPU 7
+   BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
+   IP: [<ffffffff811b7cd1>] dequeue_hwpoisoned_huge_page+0x131/0x1e0
+   PGD bae42067 PUD ba47d067 PMD 0
+   Oops: 0000 [#1] SMP
+  ...
+   CPU: 7 PID: 128 Comm: kworker/7:2 Tainted: G   M       O 3.13.0-rc4-131217-1558-00003-g83b7df08e462 #25
+  ...
+   Call Trace:
+     me_huge_page+0x3e/0x50
+     memory_failure+0x4bb/0xc20
+     mce_process_work+0x3e/0x70
+     process_one_work+0x171/0x420
+     worker_thread+0x11b/0x3a0
+     ? manage_workers.isra.25+0x2b0/0x2b0
+     kthread+0xe4/0x100
+     ? kthread_create_on_node+0x190/0x190
+     ret_from_fork+0x7c/0xb0
+     ? kthread_create_on_node+0x190/0x190
+  ...
+   RIP   dequeue_hwpoisoned_huge_page+0x131/0x1e0
+   CR2: 0000000000000058
+
+The reasoning of this problem is shown below:
+ - when we have a memory error on a thp tail page, the memory error
+   handler grabs a refcount of the head page to keep the thp under us.
+ - Before unmapping the error page from processes, we split the thp,
+   where page refcounts of both of head/tail pages don't change.
+ - Then we call try_to_unmap() over the error page (which was a tail
+   page before). We didn't pin the error page to handle the memory error,
+   this error page is freed and removed from LRU list.
+ - We never have the error page on LRU list, so the first page state
+   check returns "unknown page," then we move to the second check
+   with the saved page flag.
+ - The saved page flag have PG_tail set, so the second page state check
+   returns "hugepage."
+ - We call me_huge_page() for freed error page, then we hit the above panic.
+
+The root cause is that we didn't move refcount from the head page to the
+tail page after split thp.  So this patch suggests to do this.
+
+This panic was introduced by commit 524fca1e73 ("HWPOISON: fix
+misjudgement of page_action() for errors on mlocked pages").  Note that we
+did have the same refcount problem before this commit, but it was just
+ignored because we had only first page state check which returned "unknown
+page." The commit changed the refcount problem from "doesn't work" to
+"kernel panic."
+
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -938,6 +938,16 @@ static int hwpoison_user_mappings(struct
+ 				BUG_ON(!PageHWPoison(p));
+ 				return SWAP_FAIL;
+ 			}
++			/*
++			 * We pinned the head page for hwpoison handling,
++			 * now we split the thp and we are interested in
++			 * the hwpoisoned raw page, so move the refcount
++			 * to it.
++			 */
++			if (hpage != p) {
++				put_page(hpage);
++				get_page(p);
++			}
+ 			/* THP is split, so ppage should be the real poisoned page. */
+ 			ppage = p;
+ 		}
diff --git a/queue-3.12/mm-mempolicy-correct-putback-method-for-isolate-pages-if-failed.patch b/queue-3.12/mm-mempolicy-correct-putback-method-for-isolate-pages-if-failed.patch
new file mode 100644
index 00000000000..d9bc96e4189
--- /dev/null
+++ b/queue-3.12/mm-mempolicy-correct-putback-method-for-isolate-pages-if-failed.patch
@@ -0,0 +1,45 @@
+From b0e5fd7359f1ce8db4ccb862b3aa80d2f2cbf4d0 Mon Sep 17 00:00:00 2001
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Date: Wed, 18 Dec 2013 17:08:51 -0800
+Subject: mm/mempolicy: correct putback method for isolate pages if failed
+
+From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+
+commit b0e5fd7359f1ce8db4ccb862b3aa80d2f2cbf4d0 upstream.
+
+queue_pages_range() isolates hugetlbfs pages and putback_lru_pages()
+can't handle these.  We should change it to putback_movable_pages().
+
+Naoya said that it is worth going into stable, because it can break
+in-use hugepage list.
+
+Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Acked-by: Rafael Aquini <aquini@redhat.com>
+Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mempolicy.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -1317,7 +1317,7 @@ static long do_mbind(unsigned long start
+ 		if (nr_failed && (flags & MPOL_MF_STRICT))
+ 			err = -EIO;
+ 	} else
+-		putback_lru_pages(&pagelist);
++		putback_movable_pages(&pagelist);
+ 
+ 	up_write(&mm->mmap_sem);
+  mpol_out:
diff --git a/queue-3.12/mm-munlock-fix-a-bug-where-thp-tail-page-is-encountered.patch b/queue-3.12/mm-munlock-fix-a-bug-where-thp-tail-page-is-encountered.patch
new file mode 100644
index 00000000000..6efe619a31d
--- /dev/null
+++ b/queue-3.12/mm-munlock-fix-a-bug-where-thp-tail-page-is-encountered.patch
@@ -0,0 +1,116 @@
+From c424be1cbbf852e46acc84d73162af3066cd2c86 Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Thu, 2 Jan 2014 12:58:43 -0800
+Subject: mm: munlock: fix a bug where THP tail page is encountered
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit c424be1cbbf852e46acc84d73162af3066cd2c86 upstream.
+
+Since commit ff6a6da60b89 ("mm: accelerate munlock() treatment of THP
+pages") munlock skips tail pages of a munlocked THP page.  However, when
+the head page already has PageMlocked unset, it will not skip the tail
+pages.
+
+Commit 7225522bb429 ("mm: munlock: batch non-THP page isolation and
+munlock+putback using pagevec") has added a PageTransHuge() check which
+contains VM_BUG_ON(PageTail(page)).  Sasha Levin found this triggered
+using trinity, on the first tail page of a THP page without PageMlocked
+flag.
+
+This patch fixes the issue by skipping tail pages also in the case when
+PageMlocked flag is unset.  There is still a possibility of race with
+THP page split between clearing PageMlocked and determining how many
+pages to skip.  The race might result in former tail pages not being
+skipped, which is however no longer a bug, as during the skip the
+PageTail flags are cleared.
+
+However this race also affects correctness of NR_MLOCK accounting, which
+is to be fixed in a separate patch.
+
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Reported-by: Sasha Levin <sasha.levin@oracle.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Bob Liu <bob.liu@oracle.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mlock.c |   29 ++++++++++++++++++++++-------
+ 1 file changed, 22 insertions(+), 7 deletions(-)
+
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -133,7 +133,10 @@ static void __munlock_isolation_failed(s
+ 
+ /**
+  * munlock_vma_page - munlock a vma page
+- * @page - page to be unlocked
++ * @page - page to be unlocked, either a normal page or THP page head
++ *
++ * returns the size of the page as a page mask (0 for normal page,
++ *         HPAGE_PMD_NR - 1 for THP head page)
+  *
+  * called from munlock()/munmap() path with page supposedly on the LRU.
+  * When we munlock a page, because the vma where we found the page is being
+@@ -148,21 +151,30 @@ static void __munlock_isolation_failed(s
+  */
+ unsigned int munlock_vma_page(struct page *page)
+ {
+-	unsigned int page_mask = 0;
++	unsigned int nr_pages;
+ 
+ 	BUG_ON(!PageLocked(page));
+ 
+ 	if (TestClearPageMlocked(page)) {
+-		unsigned int nr_pages = hpage_nr_pages(page);
++		nr_pages = hpage_nr_pages(page);
+ 		mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
+-		page_mask = nr_pages - 1;
+ 		if (!isolate_lru_page(page))
+ 			__munlock_isolated_page(page);
+ 		else
+ 			__munlock_isolation_failed(page);
++	} else {
++		nr_pages = hpage_nr_pages(page);
+ 	}
+ 
+-	return page_mask;
++	/*
++	 * Regardless of the original PageMlocked flag, we determine nr_pages
++	 * after touching the flag. This leaves a possible race with a THP page
++	 * split, such that a whole THP page was munlocked, but nr_pages == 1.
++	 * Returning a smaller mask due to that is OK, the worst that can
++	 * happen is subsequent useless scanning of the former tail pages.
++	 * The NR_MLOCK accounting can however become broken.
++	 */
++	return nr_pages - 1;
+ }
+ 
+ /**
+@@ -440,7 +452,8 @@ void munlock_vma_pages_range(struct vm_a
+ 
+ 	while (start < end) {
+ 		struct page *page = NULL;
+-		unsigned int page_mask, page_increm;
++		unsigned int page_mask;
++		unsigned long page_increm;
+ 		struct pagevec pvec;
+ 		struct zone *zone;
+ 		int zoneid;
+@@ -490,7 +503,9 @@ void munlock_vma_pages_range(struct vm_a
+ 				goto next;
+ 			}
+ 		}
+-		page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
++		/* It's a bug to munlock in the middle of a THP page */
++		VM_BUG_ON((start >> PAGE_SHIFT) & page_mask);
++		page_increm = 1 + page_mask;
+ 		start += page_increm * PAGE_SIZE;
+ next:
+ 		cond_resched();
diff --git a/queue-3.12/mm-munlock-fix-deadlock-in-__munlock_pagevec.patch b/queue-3.12/mm-munlock-fix-deadlock-in-__munlock_pagevec.patch
new file mode 100644
index 00000000000..c10ccee5a40
--- /dev/null
+++ b/queue-3.12/mm-munlock-fix-deadlock-in-__munlock_pagevec.patch
@@ -0,0 +1,81 @@
+From 3b25df93c6e37e323b86a2a8c1e00c0a2821c6c9 Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Thu, 2 Jan 2014 12:58:44 -0800
+Subject: mm: munlock: fix deadlock in __munlock_pagevec()
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 3b25df93c6e37e323b86a2a8c1e00c0a2821c6c9 upstream.
+
+Commit 7225522bb429 ("mm: munlock: batch non-THP page isolation and
+munlock+putback using pagevec" introduced __munlock_pagevec() to speed
+up munlock by holding lru_lock over multiple isolated pages.  Pages that
+fail to be isolated are put_page()d immediately, also within the lock.
+
+This can lead to deadlock when __munlock_pagevec() becomes the holder of
+the last page pin and put_page() leads to __page_cache_release() which
+also locks lru_lock.  The deadlock has been observed by Sasha Levin
+using trinity.
+
+This patch avoids the deadlock by deferring put_page() operations until
+lru_lock is released.  Another pagevec (which is also used by later
+phases of the function is reused to gather the pages for put_page()
+operation.
+
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Reported-by: Sasha Levin <sasha.levin@oracle.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mlock.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -298,10 +298,12 @@ static void __munlock_pagevec(struct pag
+ {
+ 	int i;
+ 	int nr = pagevec_count(pvec);
+-	int delta_munlocked = -nr;
++	int delta_munlocked;
+ 	struct pagevec pvec_putback;
+ 	int pgrescued = 0;
+ 
++	pagevec_init(&pvec_putback, 0);
++
+ 	/* Phase 1: page isolation */
+ 	spin_lock_irq(&zone->lru_lock);
+ 	for (i = 0; i < nr; i++) {
+@@ -330,18 +332,21 @@ skip_munlock:
+ 			/*
+ 			 * We won't be munlocking this page in the next phase
+ 			 * but we still need to release the follow_page_mask()
+-			 * pin.
++			 * pin. We cannot do it under lru_lock however. If it's
++			 * the last pin, __page_cache_release would deadlock.
+ 			 */
++			pagevec_add(&pvec_putback, pvec->pages[i]);
+ 			pvec->pages[i] = NULL;
+-			put_page(page);
+-			delta_munlocked++;
+ 		}
+ 	}
++	delta_munlocked = -nr + pagevec_count(&pvec_putback);
+ 	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
+ 	spin_unlock_irq(&zone->lru_lock);
+ 
++	/* Now we can release pins of pages that we are not munlocking */
++	pagevec_release(&pvec_putback);
++
+ 	/* Phase 2: page munlock */
+-	pagevec_init(&pvec_putback, 0);
+ 	for (i = 0; i < nr; i++) {
+ 		struct page *page = pvec->pages[i];
+ 
diff --git a/queue-3.12/mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch b/queue-3.12/mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch
new file mode 100644
index 00000000000..a7266433166
--- /dev/null
+++ b/queue-3.12/mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch
@@ -0,0 +1,42 @@
+From af2c1401e6f9177483be4fad876d0073669df9df Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Wed, 18 Dec 2013 17:08:45 -0800
+Subject: mm: numa: guarantee that tlb_flush_pending updates are visible before page table updates
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit af2c1401e6f9177483be4fad876d0073669df9df upstream.
+
+According to documentation on barriers, stores issued before a LOCK can
+complete after the lock implying that it's possible tlb_flush_pending
+can be visible after a page table update.  As per revised documentation,
+this patch adds a smp_mb__before_spinlock to guarantee the correct
+ordering.
+
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mm_types.h |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -478,7 +478,12 @@ static inline bool mm_tlb_flush_pending(
+ static inline void set_tlb_flush_pending(struct mm_struct *mm)
+ {
+ 	mm->tlb_flush_pending = true;
+-	barrier();
++
++	/*
++	 * Guarantee that the tlb_flush_pending store does not leak into the
++	 * critical section updating the page tables
++	 */
++	smp_mb__before_spinlock();
+ }
+ /* Clearing is done after a TLB flush, which also provides a barrier. */
+ static inline void clear_tlb_flush_pending(struct mm_struct *mm)
diff --git a/queue-3.12/mm-page_alloc-revert-numa-aspect-of-fair-allocation-policy.patch b/queue-3.12/mm-page_alloc-revert-numa-aspect-of-fair-allocation-policy.patch
new file mode 100644
index 00000000000..921f6b368db
--- /dev/null
+++ b/queue-3.12/mm-page_alloc-revert-numa-aspect-of-fair-allocation-policy.patch
@@ -0,0 +1,84 @@
+From fff4068cba484e6b0abe334ed6b15d5a215a3b25 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Fri, 20 Dec 2013 14:54:12 +0000
+Subject: mm: page_alloc: revert NUMA aspect of fair allocation policy
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit fff4068cba484e6b0abe334ed6b15d5a215a3b25 upstream.
+
+Commit 81c0a2bb515f ("mm: page_alloc: fair zone allocator policy") meant
+to bring aging fairness among zones in system, but it was overzealous
+and badly regressed basic workloads on NUMA systems.
+
+Due to the way kswapd and page allocator interacts, we still want to
+make sure that all zones in any given node are used equally for all
+allocations to maximize memory utilization and prevent thrashing on the
+highest zone in the node.
+
+While the same principle applies to NUMA nodes - memory utilization is
+obviously improved by spreading allocations throughout all nodes -
+remote references can be costly and so many workloads prefer locality
+over memory utilization.  The original change assumed that
+zone_reclaim_mode would be a good enough predictor for that, but it
+turned out to be as indicative as a coin flip.
+
+Revert the NUMA aspect of the fairness until we can find a proper way to
+make it configurable and agree on a sane default.
+
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_alloc.c |   19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -1822,7 +1822,7 @@ static void zlc_clear_zones_full(struct
+ 
+ static bool zone_local(struct zone *local_zone, struct zone *zone)
+ {
+-	return node_distance(local_zone->node, zone->node) == LOCAL_DISTANCE;
++	return local_zone->node == zone->node;
+ }
+ 
+ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
+@@ -1919,18 +1919,17 @@ zonelist_scan:
+ 		 * page was allocated in should have no effect on the
+ 		 * time the page has in memory before being reclaimed.
+ 		 *
+-		 * When zone_reclaim_mode is enabled, try to stay in
+-		 * local zones in the fastpath.  If that fails, the
+-		 * slowpath is entered, which will do another pass
+-		 * starting with the local zones, but ultimately fall
+-		 * back to remote zones that do not partake in the
+-		 * fairness round-robin cycle of this zonelist.
++		 * Try to stay in local zones in the fastpath.  If
++		 * that fails, the slowpath is entered, which will do
++		 * another pass starting with the local zones, but
++		 * ultimately fall back to remote zones that do not
++		 * partake in the fairness round-robin cycle of this
++		 * zonelist.
+ 		 */
+ 		if (alloc_flags & ALLOC_WMARK_LOW) {
+ 			if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
+ 				continue;
+-			if (zone_reclaim_mode &&
+-			    !zone_local(preferred_zone, zone))
++			if (!zone_local(preferred_zone, zone))
+ 				continue;
+ 		}
+ 		/*
+@@ -2396,7 +2395,7 @@ static void prepare_slowpath(gfp_t gfp_m
+ 		 * thrash fairness information for zones that are not
+ 		 * actually part of this zonelist's round-robin cycle.
+ 		 */
+-		if (zone_reclaim_mode && !zone_local(preferred_zone, zone))
++		if (!zone_local(preferred_zone, zone))
+ 			continue;
+ 		mod_zone_page_state(zone, NR_ALLOC_BATCH,
+ 				    high_wmark_pages(zone) -
diff --git a/queue-3.12/series b/queue-3.12/series
index 71b8e3e8b13..af1ccee57d2 100644
--- a/queue-3.12/series
+++ b/queue-3.12/series
@@ -110,3 +110,14 @@ revert-of-address-handle-address-cells-2-specially.patch
 mm-clear-pmd_numa-before-invalidating.patch
 mm-numa-ensure-anon_vma-is-locked-to-prevent-parallel-thp-splits.patch
 mm-numa-avoid-unnecessary-work-on-the-failure-path.patch
+mm-fix-tlb-flush-race-between-migration-and-change_protection_range.patch
+mm-numa-guarantee-that-tlb_flush_pending-updates-are-visible-before-page-table-updates.patch
+mm-mempolicy-correct-putback-method-for-isolate-pages-if-failed.patch
+mm-compaction-respect-ignore_skip_hint-in-update_pageblock_skip.patch
+mm-memory-failure.c-recheck-pagehuge-after-hugetlb-page-migrate-successfully.patch
+mm-hugetlb-check-for-pte-null-pointer-in-__page_check_address.patch
+mm-page_alloc-revert-numa-aspect-of-fair-allocation-policy.patch
+mm-munlock-fix-a-bug-where-thp-tail-page-is-encountered.patch
+mm-munlock-fix-deadlock-in-__munlock_pagevec.patch
+mm-fix-use-after-free-in-sys_remap_file_pages.patch
+mm-memory-failure.c-transfer-page-count-from-head-page-to-tail-page-after-split-thp.patch