From 56c9cc1e619f4c21689877c460616fa1ac8ae7f0 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 14 Jun 2013 14:40:46 -0700
Subject: [PATCH] 3.4-stable patches

added patches:
	md-raid1-consider-write-as-successful-only-if-at-least-one-non-faulty-and-non-rebuilding-drive-completed-it.patch
	mm-migration-add-migrate_entry_wait_huge.patch
	x86-fix-typo-in-kexec-register-clearing.patch
---
 ...nd-non-rebuilding-drive-completed-it.patch |  88 ++++++++++++++
 ...igration-add-migrate_entry_wait_huge.patch | 109 ++++++++++++++++++
 queue-3.4/series                              |   3 +
 ...-fix-typo-in-kexec-register-clearing.patch |  33 ++++++
 4 files changed, 233 insertions(+)
 create mode 100644 queue-3.4/md-raid1-consider-write-as-successful-only-if-at-least-one-non-faulty-and-non-rebuilding-drive-completed-it.patch
 create mode 100644 queue-3.4/mm-migration-add-migrate_entry_wait_huge.patch
 create mode 100644 queue-3.4/x86-fix-typo-in-kexec-register-clearing.patch

diff --git a/queue-3.4/md-raid1-consider-write-as-successful-only-if-at-least-one-non-faulty-and-non-rebuilding-drive-completed-it.patch b/queue-3.4/md-raid1-consider-write-as-successful-only-if-at-least-one-non-faulty-and-non-rebuilding-drive-completed-it.patch
new file mode 100644
index 00000000000..865430e8bff
--- /dev/null
+++ b/queue-3.4/md-raid1-consider-write-as-successful-only-if-at-least-one-non-faulty-and-non-rebuilding-drive-completed-it.patch
@@ -0,0 +1,88 @@
+From 3056e3aec8d8ba61a0710fb78b2d562600aa2ea7 Mon Sep 17 00:00:00 2001
+From: Alex Lyakas <alex@zadarastorage.com>
+Date: Tue, 4 Jun 2013 20:42:21 +0300
+Subject: md/raid1: consider WRITE as successful only if at least one non-Faulty and non-rebuilding drive completed it.
+
+From: Alex Lyakas <alex@zadarastorage.com>
+
+commit 3056e3aec8d8ba61a0710fb78b2d562600aa2ea7 upstream.
+
+Without that fix, the following scenario could happen:
+
+- RAID1 with drives A and B; drive B was freshly-added and is rebuilding
+- Drive A fails
+- WRITE request arrives to the array. It is failed by drive A, so
+r1_bio is marked as R1BIO_WriteError, but the rebuilding drive B
+succeeds in writing it, so the same r1_bio is marked as
+R1BIO_Uptodate.
+- r1_bio arrives to handle_write_finished, badblocks are disabled,
+md_error()->error() does nothing because we don't fail the last drive
+of raid1
+- raid_end_bio_io()  calls call_bio_endio()
+- As a result, in call_bio_endio():
+        if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
+                clear_bit(BIO_UPTODATE, &bio->bi_flags);
+this code doesn't clear the BIO_UPTODATE flag, and the whole master
+WRITE succeeds, back to the upper layer.
+
+So we returned success to the upper layer, even though we had written
+the data onto the rebuilding drive only. But when we want to read the
+data back, we would not read from the rebuilding drive, so this data
+is lost.
+
+[neilb - applied identical change to raid10 as well]
+
+This bug can result in lost data, so it is suitable for any
+-stable kernel.
+
+Signed-off-by: Alex Lyakas <alex@zadarastorage.com>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/raid1.c  |   12 +++++++++++-
+ drivers/md/raid10.c |   12 +++++++++++-
+ 2 files changed, 22 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -413,7 +413,17 @@ static void raid1_end_write_request(stru
+ 
+ 		r1_bio->bios[mirror] = NULL;
+ 		to_put = bio;
+-		set_bit(R1BIO_Uptodate, &r1_bio->state);
++		/*
++		 * Do not set R1BIO_Uptodate if the current device is
++		 * rebuilding or Faulty. This is because we cannot use
++		 * such device for properly reading the data back (we could
++		 * potentially use it, if the current write would have felt
++		 * before rdev->recovery_offset, but for simplicity we don't
++		 * check this here.
++		 */
++		if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) &&
++		    !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))
++			set_bit(R1BIO_Uptodate, &r1_bio->state);
+ 
+ 		/* Maybe we can clear some bad blocks. */
+ 		if (is_badblock(conf->mirrors[mirror].rdev,
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -452,7 +452,17 @@ static void raid10_end_write_request(str
+ 		sector_t first_bad;
+ 		int bad_sectors;
+ 
+-		set_bit(R10BIO_Uptodate, &r10_bio->state);
++		/*
++		 * Do not set R10BIO_Uptodate if the current device is
++		 * rebuilding or Faulty. This is because we cannot use
++		 * such device for properly reading the data back (we could
++		 * potentially use it, if the current write would have felt
++		 * before rdev->recovery_offset, but for simplicity we don't
++		 * check this here.
++		 */
++		if (test_bit(In_sync, &rdev->flags) &&
++		    !test_bit(Faulty, &rdev->flags))
++			set_bit(R10BIO_Uptodate, &r10_bio->state);
+ 
+ 		/* Maybe we can clear some bad blocks. */
+ 		if (is_badblock(rdev,
diff --git a/queue-3.4/mm-migration-add-migrate_entry_wait_huge.patch b/queue-3.4/mm-migration-add-migrate_entry_wait_huge.patch
new file mode 100644
index 00000000000..41194c69506
--- /dev/null
+++ b/queue-3.4/mm-migration-add-migrate_entry_wait_huge.patch
@@ -0,0 +1,109 @@
+From 30dad30922ccc733cfdbfe232090cf674dc374dc Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Date: Wed, 12 Jun 2013 14:05:04 -0700
+Subject: mm: migration: add migrate_entry_wait_huge()
+
+From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+
+commit 30dad30922ccc733cfdbfe232090cf674dc374dc upstream.
+
+When we have a page fault for the address which is backed by a hugepage
+under migration, the kernel can't wait correctly and do busy looping on
+hugepage fault until the migration finishes.  As a result, users who try
+to kick hugepage migration (via soft offlining, for example) occasionally
+experience long delay or soft lockup.
+
+This is because pte_offset_map_lock() can't get a correct migration entry
+or a correct page table lock for hugepage.  This patch introduces
+migration_entry_wait_huge() to solve this.
+
+Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
+Reviewed-by: Michal Hocko <mhocko@suse.cz>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Andi Kleen <andi@firstfloor.org>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/swapops.h |    3 +++
+ mm/hugetlb.c            |    2 +-
+ mm/migrate.c            |   23 ++++++++++++++++++-----
+ 3 files changed, 22 insertions(+), 6 deletions(-)
+
+--- a/include/linux/swapops.h
++++ b/include/linux/swapops.h
+@@ -137,6 +137,7 @@ static inline void make_migration_entry_
+ 
+ extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
+ 					unsigned long address);
++extern void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte);
+ #else
+ 
+ #define make_migration_entry(page, write) swp_entry(0, 0)
+@@ -148,6 +149,8 @@ static inline int is_migration_entry(swp
+ static inline void make_migration_entry_read(swp_entry_t *entryp) { }
+ static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
+ 					 unsigned long address) { }
++static inline void migration_entry_wait_huge(struct mm_struct *mm,
++					pte_t *pte) { }
+ static inline int is_write_migration_entry(swp_entry_t entry)
+ {
+ 	return 0;
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -2768,7 +2768,7 @@ int hugetlb_fault(struct mm_struct *mm,
+ 	if (ptep) {
+ 		entry = huge_ptep_get(ptep);
+ 		if (unlikely(is_hugetlb_entry_migration(entry))) {
+-			migration_entry_wait(mm, (pmd_t *)ptep, address);
++			migration_entry_wait_huge(mm, ptep);
+ 			return 0;
+ 		} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
+ 			return VM_FAULT_HWPOISON_LARGE |
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -180,15 +180,14 @@ static void remove_migration_ptes(struct
+  * get to the page and wait until migration is finished.
+  * When we return from this function the fault will be retried.
+  */
+-void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
+-				unsigned long address)
++static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
++				spinlock_t *ptl)
+ {
+-	pte_t *ptep, pte;
+-	spinlock_t *ptl;
++	pte_t pte;
+ 	swp_entry_t entry;
+ 	struct page *page;
+ 
+-	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
++	spin_lock(ptl);
+ 	pte = *ptep;
+ 	if (!is_swap_pte(pte))
+ 		goto out;
+@@ -216,6 +215,20 @@ out:
+ 	pte_unmap_unlock(ptep, ptl);
+ }
+ 
++void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
++				unsigned long address)
++{
++	spinlock_t *ptl = pte_lockptr(mm, pmd);
++	pte_t *ptep = pte_offset_map(pmd, address);
++	__migration_entry_wait(mm, ptep, ptl);
++}
++
++void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte)
++{
++	spinlock_t *ptl = &(mm)->page_table_lock;
++	__migration_entry_wait(mm, pte, ptl);
++}
++
+ #ifdef CONFIG_BLOCK
+ /* Returns true if all buffers are successfully locked */
+ static bool buffer_migrate_lock_buffers(struct buffer_head *head,
diff --git a/queue-3.4/series b/queue-3.4/series
index b71916b21f6..04f5bb3401a 100644
--- a/queue-3.4/series
+++ b/queue-3.4/series
@@ -13,3 +13,6 @@ reboot-rigrate-shutdown-reboot-to-boot-cpu.patch
 cciss-fix-broken-mutex-usage-in-ioctl.patch
 drm-i915-prefer-vbt-modes-for-svdo-lvds-over-edid.patch
 swap-avoid-read_swap_cache_async-race-to-deadlock-while-waiting-on-discard-i-o-completion.patch
+md-raid1-consider-write-as-successful-only-if-at-least-one-non-faulty-and-non-rebuilding-drive-completed-it.patch
+mm-migration-add-migrate_entry_wait_huge.patch
+x86-fix-typo-in-kexec-register-clearing.patch
diff --git a/queue-3.4/x86-fix-typo-in-kexec-register-clearing.patch b/queue-3.4/x86-fix-typo-in-kexec-register-clearing.patch
new file mode 100644
index 00000000000..e7324048358
--- /dev/null
+++ b/queue-3.4/x86-fix-typo-in-kexec-register-clearing.patch
@@ -0,0 +1,33 @@
+From c8a22d19dd238ede87aa0ac4f7dbea8da039b9c1 Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Wed, 5 Jun 2013 11:47:18 -0700
+Subject: x86: Fix typo in kexec register clearing
+
+From: Kees Cook <keescook@chromium.org>
+
+commit c8a22d19dd238ede87aa0ac4f7dbea8da039b9c1 upstream.
+
+Fixes a typo in register clearing code. Thanks to PaX Team for fixing
+this originally, and James Troup for pointing it out.
+
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: http://lkml.kernel.org/r/20130605184718.GA8396@www.outflux.net
+Cc: PaX Team <pageexec@freemail.hu>
+Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/relocate_kernel_64.S |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/relocate_kernel_64.S
++++ b/arch/x86/kernel/relocate_kernel_64.S
+@@ -160,7 +160,7 @@ identity_mapped:
+ 	xorq    %rbp, %rbp
+ 	xorq	%r8,  %r8
+ 	xorq	%r9,  %r9
+-	xorq	%r10, %r9
++	xorq	%r10, %r10
+ 	xorq	%r11, %r11
+ 	xorq	%r12, %r12
+ 	xorq	%r13, %r13
-- 
2.47.3