From 56c9cc1e619f4c21689877c460616fa1ac8ae7f0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 14 Jun 2013 14:40:46 -0700 Subject: [PATCH] 3.4-stable patches added patches: md-raid1-consider-write-as-successful-only-if-at-least-one-non-faulty-and-non-rebuilding-drive-completed-it.patch mm-migration-add-migrate_entry_wait_huge.patch x86-fix-typo-in-kexec-register-clearing.patch --- ...nd-non-rebuilding-drive-completed-it.patch | 88 ++++++++++++++ ...igration-add-migrate_entry_wait_huge.patch | 109 ++++++++++++++++++ queue-3.4/series | 3 + ...-fix-typo-in-kexec-register-clearing.patch | 33 ++++++ 4 files changed, 233 insertions(+) create mode 100644 queue-3.4/md-raid1-consider-write-as-successful-only-if-at-least-one-non-faulty-and-non-rebuilding-drive-completed-it.patch create mode 100644 queue-3.4/mm-migration-add-migrate_entry_wait_huge.patch create mode 100644 queue-3.4/x86-fix-typo-in-kexec-register-clearing.patch diff --git a/queue-3.4/md-raid1-consider-write-as-successful-only-if-at-least-one-non-faulty-and-non-rebuilding-drive-completed-it.patch b/queue-3.4/md-raid1-consider-write-as-successful-only-if-at-least-one-non-faulty-and-non-rebuilding-drive-completed-it.patch new file mode 100644 index 00000000000..865430e8bff --- /dev/null +++ b/queue-3.4/md-raid1-consider-write-as-successful-only-if-at-least-one-non-faulty-and-non-rebuilding-drive-completed-it.patch @@ -0,0 +1,88 @@ +From 3056e3aec8d8ba61a0710fb78b2d562600aa2ea7 Mon Sep 17 00:00:00 2001 +From: Alex Lyakas +Date: Tue, 4 Jun 2013 20:42:21 +0300 +Subject: md/raid1: consider WRITE as successful only if at least one non-Faulty and non-rebuilding drive completed it. + +From: Alex Lyakas + +commit 3056e3aec8d8ba61a0710fb78b2d562600aa2ea7 upstream. + +Without that fix, the following scenario could happen: + +- RAID1 with drives A and B; drive B was freshly-added and is rebuilding +- Drive A fails +- WRITE request arrives to the array. It is failed by drive A, so +r1_bio is marked as R1BIO_WriteError, but the rebuilding drive B +succeeds in writing it, so the same r1_bio is marked as +R1BIO_Uptodate. +- r1_bio arrives to handle_write_finished, badblocks are disabled, +md_error()->error() does nothing because we don't fail the last drive +of raid1 +- raid_end_bio_io() calls call_bio_endio() +- As a result, in call_bio_endio(): + if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) + clear_bit(BIO_UPTODATE, &bio->bi_flags); +this code doesn't clear the BIO_UPTODATE flag, and the whole master +WRITE succeeds, back to the upper layer. + +So we returned success to the upper layer, even though we had written +the data onto the rebuilding drive only. But when we want to read the +data back, we would not read from the rebuilding drive, so this data +is lost. + +[neilb - applied identical change to raid10 as well] + +This bug can result in lost data, so it is suitable for any +-stable kernel. + +Signed-off-by: Alex Lyakas +Signed-off-by: NeilBrown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/raid1.c | 12 +++++++++++- + drivers/md/raid10.c | 12 +++++++++++- + 2 files changed, 22 insertions(+), 2 deletions(-) + +--- a/drivers/md/raid1.c ++++ b/drivers/md/raid1.c +@@ -413,7 +413,17 @@ static void raid1_end_write_request(stru + + r1_bio->bios[mirror] = NULL; + to_put = bio; +- set_bit(R1BIO_Uptodate, &r1_bio->state); ++ /* ++ * Do not set R1BIO_Uptodate if the current device is ++ * rebuilding or Faulty. This is because we cannot use ++ * such device for properly reading the data back (we could ++ * potentially use it, if the current write would have felt ++ * before rdev->recovery_offset, but for simplicity we don't ++ * check this here. ++ */ ++ if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) && ++ !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)) ++ set_bit(R1BIO_Uptodate, &r1_bio->state); + + /* Maybe we can clear some bad blocks. */ + if (is_badblock(conf->mirrors[mirror].rdev, +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -452,7 +452,17 @@ static void raid10_end_write_request(str + sector_t first_bad; + int bad_sectors; + +- set_bit(R10BIO_Uptodate, &r10_bio->state); ++ /* ++ * Do not set R10BIO_Uptodate if the current device is ++ * rebuilding or Faulty. This is because we cannot use ++ * such device for properly reading the data back (we could ++ * potentially use it, if the current write would have felt ++ * before rdev->recovery_offset, but for simplicity we don't ++ * check this here. ++ */ ++ if (test_bit(In_sync, &rdev->flags) && ++ !test_bit(Faulty, &rdev->flags)) ++ set_bit(R10BIO_Uptodate, &r10_bio->state); + + /* Maybe we can clear some bad blocks. */ + if (is_badblock(rdev, diff --git a/queue-3.4/mm-migration-add-migrate_entry_wait_huge.patch b/queue-3.4/mm-migration-add-migrate_entry_wait_huge.patch new file mode 100644 index 00000000000..41194c69506 --- /dev/null +++ b/queue-3.4/mm-migration-add-migrate_entry_wait_huge.patch @@ -0,0 +1,109 @@ +From 30dad30922ccc733cfdbfe232090cf674dc374dc Mon Sep 17 00:00:00 2001 +From: Naoya Horiguchi +Date: Wed, 12 Jun 2013 14:05:04 -0700 +Subject: mm: migration: add migrate_entry_wait_huge() + +From: Naoya Horiguchi + +commit 30dad30922ccc733cfdbfe232090cf674dc374dc upstream. + +When we have a page fault for the address which is backed by a hugepage +under migration, the kernel can't wait correctly and do busy looping on +hugepage fault until the migration finishes. As a result, users who try +to kick hugepage migration (via soft offlining, for example) occasionally +experience long delay or soft lockup. + +This is because pte_offset_map_lock() can't get a correct migration entry +or a correct page table lock for hugepage. This patch introduces +migration_entry_wait_huge() to solve this. + +Signed-off-by: Naoya Horiguchi +Reviewed-by: Rik van Riel +Reviewed-by: Wanpeng Li +Reviewed-by: Michal Hocko +Cc: Mel Gorman +Cc: Andi Kleen +Cc: KOSAKI Motohiro +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/swapops.h | 3 +++ + mm/hugetlb.c | 2 +- + mm/migrate.c | 23 ++++++++++++++++++----- + 3 files changed, 22 insertions(+), 6 deletions(-) + +--- a/include/linux/swapops.h ++++ b/include/linux/swapops.h +@@ -137,6 +137,7 @@ static inline void make_migration_entry_ + + extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, + unsigned long address); ++extern void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte); + #else + + #define make_migration_entry(page, write) swp_entry(0, 0) +@@ -148,6 +149,8 @@ static inline int is_migration_entry(swp + static inline void make_migration_entry_read(swp_entry_t *entryp) { } + static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, + unsigned long address) { } ++static inline void migration_entry_wait_huge(struct mm_struct *mm, ++ pte_t *pte) { } + static inline int is_write_migration_entry(swp_entry_t entry) + { + return 0; +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -2768,7 +2768,7 @@ int hugetlb_fault(struct mm_struct *mm, + if (ptep) { + entry = huge_ptep_get(ptep); + if (unlikely(is_hugetlb_entry_migration(entry))) { +- migration_entry_wait(mm, (pmd_t *)ptep, address); ++ migration_entry_wait_huge(mm, ptep); + return 0; + } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) + return VM_FAULT_HWPOISON_LARGE | +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -180,15 +180,14 @@ static void remove_migration_ptes(struct + * get to the page and wait until migration is finished. + * When we return from this function the fault will be retried. + */ +-void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, +- unsigned long address) ++static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, ++ spinlock_t *ptl) + { +- pte_t *ptep, pte; +- spinlock_t *ptl; ++ pte_t pte; + swp_entry_t entry; + struct page *page; + +- ptep = pte_offset_map_lock(mm, pmd, address, &ptl); ++ spin_lock(ptl); + pte = *ptep; + if (!is_swap_pte(pte)) + goto out; +@@ -216,6 +215,20 @@ out: + pte_unmap_unlock(ptep, ptl); + } + ++void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, ++ unsigned long address) ++{ ++ spinlock_t *ptl = pte_lockptr(mm, pmd); ++ pte_t *ptep = pte_offset_map(pmd, address); ++ __migration_entry_wait(mm, ptep, ptl); ++} ++ ++void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte) ++{ ++ spinlock_t *ptl = &(mm)->page_table_lock; ++ __migration_entry_wait(mm, pte, ptl); ++} ++ + #ifdef CONFIG_BLOCK + /* Returns true if all buffers are successfully locked */ + static bool buffer_migrate_lock_buffers(struct buffer_head *head, diff --git a/queue-3.4/series b/queue-3.4/series index b71916b21f6..04f5bb3401a 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -13,3 +13,6 @@ reboot-rigrate-shutdown-reboot-to-boot-cpu.patch cciss-fix-broken-mutex-usage-in-ioctl.patch drm-i915-prefer-vbt-modes-for-svdo-lvds-over-edid.patch swap-avoid-read_swap_cache_async-race-to-deadlock-while-waiting-on-discard-i-o-completion.patch +md-raid1-consider-write-as-successful-only-if-at-least-one-non-faulty-and-non-rebuilding-drive-completed-it.patch +mm-migration-add-migrate_entry_wait_huge.patch +x86-fix-typo-in-kexec-register-clearing.patch diff --git a/queue-3.4/x86-fix-typo-in-kexec-register-clearing.patch b/queue-3.4/x86-fix-typo-in-kexec-register-clearing.patch new file mode 100644 index 00000000000..e7324048358 --- /dev/null +++ b/queue-3.4/x86-fix-typo-in-kexec-register-clearing.patch @@ -0,0 +1,33 @@ +From c8a22d19dd238ede87aa0ac4f7dbea8da039b9c1 Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Wed, 5 Jun 2013 11:47:18 -0700 +Subject: x86: Fix typo in kexec register clearing + +From: Kees Cook + +commit c8a22d19dd238ede87aa0ac4f7dbea8da039b9c1 upstream. + +Fixes a typo in register clearing code. Thanks to PaX Team for fixing +this originally, and James Troup for pointing it out. + +Signed-off-by: Kees Cook +Link: http://lkml.kernel.org/r/20130605184718.GA8396@www.outflux.net +Cc: PaX Team +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/relocate_kernel_64.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/relocate_kernel_64.S ++++ b/arch/x86/kernel/relocate_kernel_64.S +@@ -160,7 +160,7 @@ identity_mapped: + xorq %rbp, %rbp + xorq %r8, %r8 + xorq %r9, %r9 +- xorq %r10, %r9 ++ xorq %r10, %r10 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 -- 2.47.3