--- /dev/null
+From dfd20b2b174d3a9b258ea3b7a35ead33576587b1 Mon Sep 17 00:00:00 2001
+From: Brian Behlendorf <behlendorf1@llnl.gov>
+Date: Fri, 24 May 2013 15:55:28 -0700
+Subject: drivers/block/brd.c: fix brd_lookup_page() race
+
+From: Brian Behlendorf <behlendorf1@llnl.gov>
+
+commit dfd20b2b174d3a9b258ea3b7a35ead33576587b1 upstream.
+
+The index on the page must be set before it is inserted in the radix
+tree. Otherwise there is a small race which can occur during lookup
+where the page can be found with the incorrect index. This will trigger
+the BUG_ON() in brd_lookup_page().
+
+Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
+Reported-by: Chris Wedgwood <cw@f00f.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/brd.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/block/brd.c
++++ b/drivers/block/brd.c
+@@ -117,13 +117,13 @@ static struct page *brd_insert_page(stru
+
+ spin_lock(&brd->brd_lock);
+ idx = sector >> PAGE_SECTORS_SHIFT;
++ page->index = idx;
+ if (radix_tree_insert(&brd->brd_pages, idx, page)) {
+ __free_page(page);
+ page = radix_tree_lookup(&brd->brd_pages, idx);
+ BUG_ON(!page);
+ BUG_ON(page->index != idx);
+- } else
+- page->index = idx;
++ }
+ spin_unlock(&brd->brd_lock);
+
+ radix_tree_preload_end();
--- /dev/null
+From 4b949b8af12e24b8a48fa5bb775a13b558d9f4da Mon Sep 17 00:00:00 2001
+From: Christian Gmeiner <christian.gmeiner@gmail.com>
+Date: Fri, 24 May 2013 15:55:22 -0700
+Subject: drivers/leds/leds-ot200.c: fix error caused by shifted mask
+
+From: Christian Gmeiner <christian.gmeiner@gmail.com>
+
+commit 4b949b8af12e24b8a48fa5bb775a13b558d9f4da upstream.
+
+During the development of this driver an in-house register documentation
+was used. The last week some integration tests were done and this
+problem was found. It turned out that the released register
+documentation is wrong.
+
+The fix is very simple: shift all masks by one.
+
+Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
+Cc: Bryan Wu <cooloney@gmail.com>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/leds/leds-ot200.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/drivers/leds/leds-ot200.c
++++ b/drivers/leds/leds-ot200.c
+@@ -47,37 +47,37 @@ static struct ot200_led leds[] = {
+ {
+ .name = "led_1",
+ .port = 0x49,
+- .mask = BIT(7),
++ .mask = BIT(6),
+ },
+ {
+ .name = "led_2",
+ .port = 0x49,
+- .mask = BIT(6),
++ .mask = BIT(5),
+ },
+ {
+ .name = "led_3",
+ .port = 0x49,
+- .mask = BIT(5),
++ .mask = BIT(4),
+ },
+ {
+ .name = "led_4",
+ .port = 0x49,
+- .mask = BIT(4),
++ .mask = BIT(3),
+ },
+ {
+ .name = "led_5",
+ .port = 0x49,
+- .mask = BIT(3),
++ .mask = BIT(2),
+ },
+ {
+ .name = "led_6",
+ .port = 0x49,
+- .mask = BIT(2),
++ .mask = BIT(1),
+ },
+ {
+ .name = "led_7",
+ .port = 0x49,
+- .mask = BIT(1),
++ .mask = BIT(0),
+ }
+ };
+
--- /dev/null
+From 99e11334dcb846f9b76fb808196c7f47aa83abb3 Mon Sep 17 00:00:00 2001
+From: Martin Michlmayr <tbm@cyrius.com>
+Date: Sun, 21 Apr 2013 17:14:00 +0100
+Subject: Kirkwood: Enable PCIe port 1 on QNAP TS-11x/TS-21x
+
+From: Martin Michlmayr <tbm@cyrius.com>
+
+commit 99e11334dcb846f9b76fb808196c7f47aa83abb3 upstream.
+
+Enable KW_PCIE1 on QNAP TS-11x/TS-21x devices as newer revisions
+(rev 1.3) have a USB 3.0 chip from Etron on PCIe port 1. Thanks
+to Marek Vasut for identifying this issue!
+
+Signed-off-by: Martin Michlmayr <tbm@cyrius.com>
+Tested-by: Marek Vasut <marex@denx.de>
+Acked-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: Jason Cooper <jason@lakedaemon.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/mach-kirkwood/ts219-setup.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/mach-kirkwood/ts219-setup.c
++++ b/arch/arm/mach-kirkwood/ts219-setup.c
+@@ -124,7 +124,7 @@ static void __init qnap_ts219_init(void)
+ static int __init ts219_pci_init(void)
+ {
+ if (machine_is_ts219())
+- kirkwood_pcie_init(KW_PCIE0);
++ kirkwood_pcie_init(KW_PCIE1 | KW_PCIE0);
+
+ return 0;
+ }
--- /dev/null
+From ac5a2962b02f57dea76d314ef2521a2170b28ab6 Mon Sep 17 00:00:00 2001
+From: "wang, biao" <biao.wang@intel.com>
+Date: Thu, 16 May 2013 09:50:13 +0800
+Subject: klist: del waiter from klist_remove_waiters before wakeup waitting process
+
+From: "wang, biao" <biao.wang@intel.com>
+
+commit ac5a2962b02f57dea76d314ef2521a2170b28ab6 upstream.
+
+There is a race between klist_remove and klist_release. klist_remove
+uses a local var waiter saved on stack. When klist_release calls
+wake_up_process(waiter->process) to wake up the waiter, waiter might run
+immediately and reuse the stack. Then, klist_release calls
+list_del(&waiter->list) to change previous
+wait data and cause prior waiter thread corrupt.
+
+The patch fixes it against kernel 3.9.
+
+Signed-off-by: wang, biao <biao.wang@intel.com>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/klist.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/lib/klist.c
++++ b/lib/klist.c
+@@ -193,10 +193,10 @@ static void klist_release(struct kref *k
+ if (waiter->node != n)
+ continue;
+
++ list_del(&waiter->list);
+ waiter->woken = 1;
+ mb();
+ wake_up_process(waiter->process);
+- list_del(&waiter->list);
+ }
+ spin_unlock(&klist_remove_lock);
+ knode_set_klist(n, NULL);
--- /dev/null
+From c2cc499c5bcf9040a738f49e8051b42078205748 Mon Sep 17 00:00:00 2001
+From: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
+Date: Fri, 24 May 2013 15:55:18 -0700
+Subject: mm compaction: fix of improper cache flush in migration code
+
+From: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
+
+commit c2cc499c5bcf9040a738f49e8051b42078205748 upstream.
+
+Page 'new' during MIGRATION can't be flushed with flush_cache_page().
+Using flush_cache_page(vma, addr, pfn) is justified only if the page is
+already placed in process page table, and that is done right after
+flush_cache_page(). But without it the arch function has no knowledge
+of process PTE and does nothing.
+
+Besides that, flush_cache_page() flushes an application cache page, but
+the kernel has a different page virtual address and dirtied it.
+
+Replace it with flush_dcache_page(new) which is the proper usage.
+
+The old page is flushed in try_to_unmap_one() before migration.
+
+This bug takes place in Sead3 board with M14Kc MIPS CPU without cache
+aliasing (but Harvard arch - separate I and D cache) in tight memory
+environment (128MB) each 1-3days on SOAK test. It fails in cc1 during
+kernel build (SIGILL, SIGBUS, SIGSEG) if CONFIG_COMPACTION is switched
+ON.
+
+Signed-off-by: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
+Cc: Leonid Yegoshin <yegoshin@mips.com>
+Acked-by: Rik van Riel <riel@redhat.com>
+Cc: Michal Hocko <mhocko@suse.cz>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Ralf Baechle <ralf@linux-mips.org>
+Cc: Russell King <rmk@arm.linux.org.uk>
+Cc: David Miller <davem@davemloft.net>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/migrate.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -145,7 +145,7 @@ static int remove_migration_pte(struct p
+ if (PageHuge(new))
+ pte = pte_mkhuge(pte);
+ #endif
+- flush_cache_page(vma, addr, pte_pfn(pte));
++ flush_dcache_page(new);
+ set_pte_at(mm, addr, ptep, pte);
+
+ if (PageHuge(new)) {
--- /dev/null
+From d34883d4e35c0a994e91dd847a82b4c9e0c31d83 Mon Sep 17 00:00:00 2001
+From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
+Date: Fri, 24 May 2013 15:55:11 -0700
+Subject: mm: mmu_notifier: re-fix freed page still mapped in secondary MMU
+
+From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
+
+commit d34883d4e35c0a994e91dd847a82b4c9e0c31d83 upstream.
+
+Commit 751efd8610d3 ("mmu_notifier_unregister NULL Pointer deref and
+multiple ->release()") breaks the fix 3ad3d901bbcf ("mm: mmu_notifier:
+fix freed page still mapped in secondary MMU").
+
+Since hlist_for_each_entry_rcu() is changed now, we can not revert that
+patch directly, so this patch reverts the commit and simply fix the bug
+spotted by that patch
+
+This bug spotted by commit 751efd8610d3 is:
+
+ There is a race condition between mmu_notifier_unregister() and
+ __mmu_notifier_release().
+
+ Assume two tasks, one calling mmu_notifier_unregister() as a result
+ of a filp_close() ->flush() callout (task A), and the other calling
+ mmu_notifier_release() from an mmput() (task B).
+
+ A B
+ t1 srcu_read_lock()
+ t2 if (!hlist_unhashed())
+ t3 srcu_read_unlock()
+ t4 srcu_read_lock()
+ t5 hlist_del_init_rcu()
+ t6 synchronize_srcu()
+ t7 srcu_read_unlock()
+ t8 hlist_del_rcu() <--- NULL pointer deref.
+
+This can be fixed by using hlist_del_init_rcu instead of hlist_del_rcu.
+
+The another issue spotted in the commit is "multiple ->release()
+callouts", we needn't care it too much because it is really rare (e.g,
+can not happen on kvm since mmu-notify is unregistered after
+exit_mmap()) and the later call of multiple ->release should be fast
+since all the pages have already been released by the first call.
+Anyway, this issue should be fixed in a separate patch.
+
+-stable suggestions: Any version that has commit 751efd8610d3 need to be
+backported. I find the oldest version has this commit is 3.0-stable.
+
+[akpm@linux-foundation.org: tweak comments]
+Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
+Tested-by: Robin Holt <holt@sgi.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mmu_notifier.c | 79 ++++++++++++++++++++++++++----------------------------
+ 1 file changed, 39 insertions(+), 40 deletions(-)
+
+--- a/mm/mmu_notifier.c
++++ b/mm/mmu_notifier.c
+@@ -40,48 +40,44 @@ void __mmu_notifier_release(struct mm_st
+ int id;
+
+ /*
+- * srcu_read_lock() here will block synchronize_srcu() in
+- * mmu_notifier_unregister() until all registered
+- * ->release() callouts this function makes have
+- * returned.
++ * SRCU here will block mmu_notifier_unregister until
++ * ->release returns.
+ */
+ id = srcu_read_lock(&srcu);
++ hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist)
++ /*
++ * If ->release runs before mmu_notifier_unregister it must be
++ * handled, as it's the only way for the driver to flush all
++ * existing sptes and stop the driver from establishing any more
++ * sptes before all the pages in the mm are freed.
++ */
++ if (mn->ops->release)
++ mn->ops->release(mn, mm);
++ srcu_read_unlock(&srcu, id);
++
+ spin_lock(&mm->mmu_notifier_mm->lock);
+ while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
+ mn = hlist_entry(mm->mmu_notifier_mm->list.first,
+ struct mmu_notifier,
+ hlist);
+-
+ /*
+- * Unlink. This will prevent mmu_notifier_unregister()
+- * from also making the ->release() callout.
++ * We arrived before mmu_notifier_unregister so
++ * mmu_notifier_unregister will do nothing other than to wait
++ * for ->release to finish and for mmu_notifier_unregister to
++ * return.
+ */
+ hlist_del_init_rcu(&mn->hlist);
+- spin_unlock(&mm->mmu_notifier_mm->lock);
+-
+- /*
+- * Clear sptes. (see 'release' description in mmu_notifier.h)
+- */
+- if (mn->ops->release)
+- mn->ops->release(mn, mm);
+-
+- spin_lock(&mm->mmu_notifier_mm->lock);
+ }
+ spin_unlock(&mm->mmu_notifier_mm->lock);
+
+ /*
+- * All callouts to ->release() which we have done are complete.
+- * Allow synchronize_srcu() in mmu_notifier_unregister() to complete
+- */
+- srcu_read_unlock(&srcu, id);
+-
+- /*
+- * mmu_notifier_unregister() may have unlinked a notifier and may
+- * still be calling out to it. Additionally, other notifiers
+- * may have been active via vmtruncate() et. al. Block here
+- * to ensure that all notifier callouts for this mm have been
+- * completed and the sptes are really cleaned up before returning
+- * to exit_mmap().
++ * synchronize_srcu here prevents mmu_notifier_release from returning to
++ * exit_mmap (which would proceed with freeing all pages in the mm)
++ * until the ->release method returns, if it was invoked by
++ * mmu_notifier_unregister.
++ *
++ * The mmu_notifier_mm can't go away from under us because one mm_count
++ * is held by exit_mmap.
+ */
+ synchronize_srcu(&srcu);
+ }
+@@ -302,31 +298,34 @@ void mmu_notifier_unregister(struct mmu_
+ {
+ BUG_ON(atomic_read(&mm->mm_count) <= 0);
+
+- spin_lock(&mm->mmu_notifier_mm->lock);
+ if (!hlist_unhashed(&mn->hlist)) {
++ /*
++ * SRCU here will force exit_mmap to wait for ->release to
++ * finish before freeing the pages.
++ */
+ int id;
+
++ id = srcu_read_lock(&srcu);
+ /*
+- * Ensure we synchronize up with __mmu_notifier_release().
++ * exit_mmap will block in mmu_notifier_release to guarantee
++ * that ->release is called before freeing the pages.
+ */
+- id = srcu_read_lock(&srcu);
+-
+- hlist_del_rcu(&mn->hlist);
+- spin_unlock(&mm->mmu_notifier_mm->lock);
+-
+ if (mn->ops->release)
+ mn->ops->release(mn, mm);
++ srcu_read_unlock(&srcu, id);
+
++ spin_lock(&mm->mmu_notifier_mm->lock);
+ /*
+- * Allow __mmu_notifier_release() to complete.
++ * Can not use list_del_rcu() since __mmu_notifier_release
++ * can delete it before we hold the lock.
+ */
+- srcu_read_unlock(&srcu, id);
+- } else
++ hlist_del_init_rcu(&mn->hlist);
+ spin_unlock(&mm->mmu_notifier_mm->lock);
++ }
+
+ /*
+- * Wait for any running method to finish, including ->release() if it
+- * was run by __mmu_notifier_release() instead of us.
++ * Wait for any running method to finish, of course including
++ * ->release if it was run by mmu_notifier_relase instead of us.
+ */
+ synchronize_srcu(&srcu);
+
--- /dev/null
+From a9ff785e4437c83d2179161e012f5bdfbd6381f0 Mon Sep 17 00:00:00 2001
+From: Cliff Wickman <cpw@sgi.com>
+Date: Fri, 24 May 2013 15:55:36 -0700
+Subject: mm/pagewalk.c: walk_page_range should avoid VM_PFNMAP areas
+
+From: Cliff Wickman <cpw@sgi.com>
+
+commit a9ff785e4437c83d2179161e012f5bdfbd6381f0 upstream.
+
+A panic can be caused by simply cat'ing /proc/<pid>/smaps while an
+application has a VM_PFNMAP range. It happened in-house when a
+benchmarker was trying to decipher the memory layout of his program.
+
+/proc/<pid>/smaps and similar walks through a user page table should not
+be looking at VM_PFNMAP areas.
+
+Certain tests in walk_page_range() (specifically split_huge_page_pmd())
+assume that all the mapped PFN's are backed with page structures. And
+this is not usually true for VM_PFNMAP areas. This can result in panics
+on kernel page faults when attempting to address those page structures.
+
+There are a half dozen callers of walk_page_range() that walk through a
+task's entire page table (as N. Horiguchi pointed out). So rather than
+change all of them, this patch changes just walk_page_range() to ignore
+VM_PFNMAP areas.
+
+The logic of hugetlb_vma() is moved back into walk_page_range(), as we
+want to test any vma in the range.
+
+VM_PFNMAP areas are used by:
+- graphics memory manager gpu/drm/drm_gem.c
+- global reference unit sgi-gru/grufile.c
+- sgi special memory char/mspec.c
+- and probably several out-of-tree modules
+
+[akpm@linux-foundation.org: remove now-unused hugetlb_vma() stub]
+Signed-off-by: Cliff Wickman <cpw@sgi.com>
+Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: David Sterba <dsterba@suse.cz>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/pagewalk.c | 70 +++++++++++++++++++++++++++++-----------------------------
+ 1 file changed, 36 insertions(+), 34 deletions(-)
+
+--- a/mm/pagewalk.c
++++ b/mm/pagewalk.c
+@@ -127,28 +127,7 @@ static int walk_hugetlb_range(struct vm_
+ return 0;
+ }
+
+-static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
+-{
+- struct vm_area_struct *vma;
+-
+- /* We don't need vma lookup at all. */
+- if (!walk->hugetlb_entry)
+- return NULL;
+-
+- VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
+- vma = find_vma(walk->mm, addr);
+- if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma))
+- return vma;
+-
+- return NULL;
+-}
+-
+ #else /* CONFIG_HUGETLB_PAGE */
+-static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
+-{
+- return NULL;
+-}
+-
+ static int walk_hugetlb_range(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+@@ -199,30 +178,53 @@ int walk_page_range(unsigned long addr,
+ if (!walk->mm)
+ return -EINVAL;
+
++ VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
++
+ pgd = pgd_offset(walk->mm, addr);
+ do {
+- struct vm_area_struct *vma;
++ struct vm_area_struct *vma = NULL;
+
+ next = pgd_addr_end(addr, end);
+
+ /*
+- * handle hugetlb vma individually because pagetable walk for
+- * the hugetlb page is dependent on the architecture and
+- * we can't handled it in the same manner as non-huge pages.
++ * This function was not intended to be vma based.
++ * But there are vma special cases to be handled:
++ * - hugetlb vma's
++ * - VM_PFNMAP vma's
+ */
+- vma = hugetlb_vma(addr, walk);
++ vma = find_vma(walk->mm, addr);
+ if (vma) {
+- if (vma->vm_end < next)
++ /*
++ * There are no page structures backing a VM_PFNMAP
++ * range, so do not allow split_huge_page_pmd().
++ */
++ if ((vma->vm_start <= addr) &&
++ (vma->vm_flags & VM_PFNMAP)) {
+ next = vma->vm_end;
++ pgd = pgd_offset(walk->mm, next);
++ continue;
++ }
+ /*
+- * Hugepage is very tightly coupled with vma, so
+- * walk through hugetlb entries within a given vma.
++ * Handle hugetlb vma individually because pagetable
++ * walk for the hugetlb page is dependent on the
++ * architecture and we can't handled it in the same
++ * manner as non-huge pages.
+ */
+- err = walk_hugetlb_range(vma, addr, next, walk);
+- if (err)
+- break;
+- pgd = pgd_offset(walk->mm, next);
+- continue;
++ if (walk->hugetlb_entry && (vma->vm_start <= addr) &&
++ is_vm_hugetlb_page(vma)) {
++ if (vma->vm_end < next)
++ next = vma->vm_end;
++ /*
++ * Hugepage is very tightly coupled with vma,
++ * so walk through hugetlb entries within a
++ * given vma.
++ */
++ err = walk_hugetlb_range(vma, addr, next, walk);
++ if (err)
++ break;
++ pgd = pgd_offset(walk->mm, next);
++ continue;
++ }
+ }
+
+ if (pgd_none_or_clear_bad(pgd)) {
--- /dev/null
+From 7c3425123ddfdc5f48e7913ff59d908789712b18 Mon Sep 17 00:00:00 2001
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Date: Fri, 24 May 2013 15:55:21 -0700
+Subject: mm/THP: use pmd_populate() to update the pmd with pgtable_t pointer
+
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 7c3425123ddfdc5f48e7913ff59d908789712b18 upstream.
+
+We should not use set_pmd_at to update pmd_t with pgtable_t pointer.
+set_pmd_at is used to set pmd with huge pte entries and architectures
+like ppc64, clear few flags from the pte when saving a new entry.
+Without this change we observe bad pte errors like below on ppc64 with
+THP enabled.
+
+ BUG: Bad page map in process ld mm=0xc000001ee39f4780 pte:7fc3f37848000001 pmd:c000001ec0000000
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/huge_memory.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1949,7 +1949,12 @@ static void collapse_huge_page(struct mm
+ pte_unmap(pte);
+ spin_lock(&mm->page_table_lock);
+ BUG_ON(!pmd_none(*pmd));
+- set_pmd_at(mm, address, pmd, _pmd);
++ /*
++ * We can only use set_pmd_at when establishing
++ * hugepmds and never for establishing regular pmds that
++ * points to regular pagetables. Use pmd_populate for that
++ */
++ pmd_populate(mm, pmd, pmd_pgtable(_pmd));
+ spin_unlock(&mm->page_table_lock);
+ anon_vma_unlock(vma->anon_vma);
+ goto out;
--- /dev/null
+From 136e8770cd5d1fe38b3c613100dd6dc4db6d4fa6 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
+Date: Fri, 24 May 2013 15:55:29 -0700
+Subject: nilfs2: fix issue of nilfs_set_page_dirty() for page at EOF boundary
+
+From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
+
+commit 136e8770cd5d1fe38b3c613100dd6dc4db6d4fa6 upstream.
+
+nilfs2: fix issue of nilfs_set_page_dirty for page at EOF boundary
+
+DESCRIPTION:
+ There are use-cases when NILFS2 file system (formatted with block size
+lesser than 4 KB) can be remounted in RO mode because of encountering of
+"broken bmap" issue.
+
+The issue was reported by Anthony Doggett <Anthony2486@interfaces.org.uk>:
+ "The machine I've been trialling nilfs on is running Debian Testing,
+ Linux version 3.2.0-4-686-pae (debian-kernel@lists.debian.org) (gcc
+ version 4.6.3 (Debian 4.6.3-14) ) #1 SMP Debian 3.2.35-2), but I've
+ also reproduced it (identically) with Debian Unstable amd64 and Debian
+ Experimental (using the 3.8-trunk kernel). The problematic partitions
+ were formatted with "mkfs.nilfs2 -b 1024 -B 8192"."
+
+SYMPTOMS:
+(1) System log contains error messages likewise:
+
+ [63102.496756] nilfs_direct_assign: invalid pointer: 0
+ [63102.496786] NILFS error (device dm-17): nilfs_bmap_assign: broken bmap (inode number=28)
+ [63102.496798]
+ [63102.524403] Remounting filesystem read-only
+
+(2) The NILFS2 file system is remounted in RO mode.
+
+REPRODUSING PATH:
+(1) Create volume group with name "unencrypted" by means of vgcreate utility.
+(2) Run script (prepared by Anthony Doggett <Anthony2486@interfaces.org.uk>):
+
+----------------[BEGIN SCRIPT]--------------------
+
+VG=unencrypted
+lvcreate --size 2G --name ntest $VG
+mkfs.nilfs2 -b 1024 -B 8192 /dev/mapper/$VG-ntest
+mkdir /var/tmp/n
+mkdir /var/tmp/n/ntest
+mount /dev/mapper/$VG-ntest /var/tmp/n/ntest
+mkdir /var/tmp/n/ntest/thedir
+cd /var/tmp/n/ntest/thedir
+sleep 2
+date
+darcs init
+sleep 2
+dmesg|tail -n 5
+date
+darcs whatsnew || true
+date
+sleep 2
+dmesg|tail -n 5
+----------------[END SCRIPT]--------------------
+
+REPRODUCIBILITY: 100%
+
+INVESTIGATION:
+As it was discovered, the issue takes place during segment
+construction after executing such sequence of user-space operations:
+
+ open("_darcs/index", O_RDWR|O_CREAT|O_NOCTTY, 0666) = 7
+ fstat(7, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
+ ftruncate(7, 60)
+
+The error message "NILFS error (device dm-17): nilfs_bmap_assign: broken
+bmap (inode number=28)" takes place because of trying to get block
+number for third block of the file with logical offset #3072 bytes. As
+it is possible to see from above output, the file has 60 bytes of the
+whole size. So, it is enough one block (1 KB in size) allocation for
+the whole file. Trying to operate with several blocks instead of one
+takes place because of discovering several dirty buffers for this file
+in nilfs_segctor_scan_file() method.
+
+The root cause of this issue is in nilfs_set_page_dirty function which
+is called just before writing to an mmapped page.
+
+When nilfs_page_mkwrite function handles a page at EOF boundary, it
+fills hole blocks only inside EOF through __block_page_mkwrite().
+
+The __block_page_mkwrite() function calls set_page_dirty() after filling
+hole blocks, thus nilfs_set_page_dirty function (=
+a_ops->set_page_dirty) is called. However, the current implementation
+of nilfs_set_page_dirty() wrongly marks all buffers dirty even for page
+at EOF boundary.
+
+As a result, buffers outside EOF are inconsistently marked dirty and
+queued for write even though they are not mapped with nilfs_get_block
+function.
+
+FIX:
+This modifies nilfs_set_page_dirty() not to mark hole blocks dirty.
+
+Thanks to Vyacheslav Dubeyko for his effort on analysis and proposals
+for this issue.
+
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
+Reported-by: Anthony Doggett <Anthony2486@interfaces.org.uk>
+Reported-by: Vyacheslav Dubeyko <slava@dubeyko.com>
+Cc: Vyacheslav Dubeyko <slava@dubeyko.com>
+Tested-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nilfs2/inode.c | 27 +++++++++++++++++++++++----
+ 1 file changed, 23 insertions(+), 4 deletions(-)
+
+--- a/fs/nilfs2/inode.c
++++ b/fs/nilfs2/inode.c
+@@ -195,13 +195,32 @@ static int nilfs_writepage(struct page *
+
+ static int nilfs_set_page_dirty(struct page *page)
+ {
+- int ret = __set_page_dirty_buffers(page);
++ int ret = __set_page_dirty_nobuffers(page);
+
+- if (ret) {
++ if (page_has_buffers(page)) {
+ struct inode *inode = page->mapping->host;
+- unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits);
++ unsigned nr_dirty = 0;
++ struct buffer_head *bh, *head;
+
+- nilfs_set_file_dirty(inode, nr_dirty);
++ /*
++ * This page is locked by callers, and no other thread
++ * concurrently marks its buffers dirty since they are
++ * only dirtied through routines in fs/buffer.c in
++ * which call sites of mark_buffer_dirty are protected
++ * by page lock.
++ */
++ bh = head = page_buffers(page);
++ do {
++ /* Do not mark hole blocks dirty */
++ if (buffer_dirty(bh) || !buffer_mapped(bh))
++ continue;
++
++ set_buffer_dirty(bh);
++ nr_dirty++;
++ } while (bh = bh->b_this_page, bh != head);
++
++ if (nr_dirty)
++ nilfs_set_file_dirty(inode, nr_dirty);
+ }
+ return ret;
+ }
--- /dev/null
+From b4ca2b4b577c3530e34dcfaafccb2cc680ce95d1 Mon Sep 17 00:00:00 2001
+From: Joseph Qi <joseph.qi@huawei.com>
+Date: Fri, 24 May 2013 15:55:34 -0700
+Subject: ocfs2: goto out_unlock if ocfs2_get_clusters_nocache() failed in ocfs2_fiemap()
+
+From: Joseph Qi <joseph.qi@huawei.com>
+
+commit b4ca2b4b577c3530e34dcfaafccb2cc680ce95d1 upstream.
+
+Last time we found there is lock/unlock bug in ocfs2_file_aio_write, and
+then we did a thorough search for all lock resources in
+ocfs2_inode_info, including rw, inode and open lockres and found this
+bug. My kernel version is 3.0.13, and it is also in the lastest version
+3.9. In ocfs2_fiemap, once ocfs2_get_clusters_nocache failed, it should
+goto out_unlock instead of out, because we need release buffer head, up
+read alloc sem and unlock inode.
+
+Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
+Reviewed-by: Jie Liu <jeff.liu@oracle.com>
+Cc: Mark Fasheh <mfasheh@suse.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Acked-by: Sunil Mushran <sunil.mushran@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/extent_map.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ocfs2/extent_map.c
++++ b/fs/ocfs2/extent_map.c
+@@ -791,7 +791,7 @@ int ocfs2_fiemap(struct inode *inode, st
+ &hole_size, &rec, &is_last);
+ if (ret) {
+ mlog_errno(ret);
+- goto out;
++ goto out_unlock;
+ }
+
+ if (rec.e_blkno == 0ULL) {
--- /dev/null
+From 5a1e99dd2028e00998d42029be86835d8ef4a46e Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Mon, 20 May 2013 14:45:26 +0000
+Subject: perf: net_dropmonitor: Fix symbol-relative addresses
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+commit 5a1e99dd2028e00998d42029be86835d8ef4a46e upstream.
+
+The comparison between traced and symbol addresses is backwards: if
+the traced address doesn't exactly match a symbol (which we don't
+expect it to), we'll show the next symbol and the offset to it,
+whereas we should show the previous symbol and the offset from it.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/scripts/python/net_dropmonitor.py | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/tools/perf/scripts/python/net_dropmonitor.py
++++ b/tools/perf/scripts/python/net_dropmonitor.py
+@@ -40,9 +40,9 @@ def get_kallsyms_table():
+
+ def get_sym(sloc):
+ loc = int(sloc)
+- for i in kallsyms:
+- if (i['loc'] >= loc):
+- return (i['name'], i['loc']-loc)
++ for i in kallsyms[::-1]:
++ if loc >= i['loc']:
++ return (i['name'], loc - i['loc'])
+ return (None, 0)
+
+ def print_drop_table():
--- /dev/null
+From 140c3c6a2bcd2c31e2f7f5a8d59689724776c8e5 Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Mon, 20 May 2013 14:44:43 +0000
+Subject: perf: net_dropmonitor: Fix trace parameter order
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+commit 140c3c6a2bcd2c31e2f7f5a8d59689724776c8e5 upstream.
+
+This works much better if we don't treat protocol numbers as addresses.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/scripts/python/net_dropmonitor.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/perf/scripts/python/net_dropmonitor.py
++++ b/tools/perf/scripts/python/net_dropmonitor.py
+@@ -64,7 +64,7 @@ def trace_end():
+
+ # called from perf, when it finds a correspoinding event
+ def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm,
+- skbaddr, protocol, location):
++ skbaddr, location, protocol):
+ slocation = str(location)
+ try:
+ drop_log[slocation] = drop_log[slocation] + 1
usb-uhci-fix-for-suspend-of-virtual-hp-controller.patch
cifs-only-set-ops-for-inodes-in-i_new-state.patch
fat-fix-possible-overflow-for-fat_clusters.patch
+tg3-fix-data-corruption-on-5725-with-tso.patch
+perf-net_dropmonitor-fix-trace-parameter-order.patch
+perf-net_dropmonitor-fix-symbol-relative-addresses.patch
+ocfs2-goto-out_unlock-if-ocfs2_get_clusters_nocache-failed-in-ocfs2_fiemap.patch
+kirkwood-enable-pcie-port-1-on-qnap-ts-11x-ts-21x.patch
+drivers-leds-leds-ot200.c-fix-error-caused-by-shifted-mask.patch
+mm-compaction-fix-of-improper-cache-flush-in-migration-code.patch
+klist-del-waiter-from-klist_remove_waiters-before-wakeup-waitting-process.patch
+wait-fix-false-timeouts-when-using-wait_event_timeout.patch
+nilfs2-fix-issue-of-nilfs_set_page_dirty-for-page-at-eof-boundary.patch
+mm-mmu_notifier-re-fix-freed-page-still-mapped-in-secondary-mmu.patch
+drivers-block-brd.c-fix-brd_lookup_page-race.patch
+mm-pagewalk.c-walk_page_range-should-avoid-vm_pfnmap-areas.patch
+mm-thp-use-pmd_populate-to-update-the-pmd-with-pgtable_t-pointer.patch
--- /dev/null
+From 0f0d15100a8ac875bdd408324c473e16d73d3557 Mon Sep 17 00:00:00 2001
+From: Michael Chan <mchan@broadcom.com>
+Date: Mon, 13 May 2013 11:04:16 +0000
+Subject: tg3: Fix data corruption on 5725 with TSO
+
+From: Michael Chan <mchan@broadcom.com>
+
+commit 0f0d15100a8ac875bdd408324c473e16d73d3557 upstream.
+
+The 5725 family of devices (asic rev 5762), corrupts TSO packets where
+the buffer is within MSS bytes of a 4G boundary (4G, 8G etc.). Detect
+this condition and trigger the workaround path.
+
+Signed-off-by: Michael Chan <mchan@broadcom.com>
+Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/broadcom/tg3.c | 17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -6622,6 +6622,20 @@ static inline int tg3_4g_overflow_test(d
+ return (base > 0xffffdcc0) && (base + len + 8 < base);
+ }
+
++/* Test for TSO DMA buffers that cross into regions which are within MSS bytes
++ * of any 4GB boundaries: 4G, 8G, etc
++ */
++static inline int tg3_4g_tso_overflow_test(struct tg3 *tp, dma_addr_t mapping,
++ u32 len, u32 mss)
++{
++ if (tg3_asic_rev(tp) == ASIC_REV_5762 && mss) {
++ u32 base = (u32) mapping & 0xffffffff;
++
++ return ((base + len + (mss & 0x3fff)) < base);
++ }
++ return 0;
++}
++
+ /* Test for DMA addresses > 40-bit */
+ static inline int tg3_40bit_overflow_test(struct tg3 *tp, dma_addr_t mapping,
+ int len)
+@@ -6658,6 +6672,9 @@ static bool tg3_tx_frag_set(struct tg3_n
+ if (tg3_4g_overflow_test(map, len))
+ hwbug = true;
+
++ if (tg3_4g_tso_overflow_test(tp, map, len, mss))
++ hwbug = true;
++
+ if (tg3_40bit_overflow_test(tp, map, len))
+ hwbug = true;
+
--- /dev/null
+From 4c663cfc523a88d97a8309b04a089c27dc57fd7e Mon Sep 17 00:00:00 2001
+From: Imre Deak <imre.deak@intel.com>
+Date: Fri, 24 May 2013 15:55:09 -0700
+Subject: wait: fix false timeouts when using wait_event_timeout()
+
+From: Imre Deak <imre.deak@intel.com>
+
+commit 4c663cfc523a88d97a8309b04a089c27dc57fd7e upstream.
+
+Many callers of the wait_event_timeout() and
+wait_event_interruptible_timeout() expect that the return value will be
+positive if the specified condition becomes true before the timeout
+elapses. However, at the moment this isn't guaranteed. If the wake-up
+handler is delayed enough, the time remaining until timeout will be
+calculated as 0 - and passed back as a return value - even if the
+condition became true before the timeout has passed.
+
+Fix this by returning at least 1 if the condition becomes true. This
+semantic is in line with what wait_for_condition_timeout() does; see
+commit bb10ed09 ("sched: fix wait_for_completion_timeout() spurious
+failure under heavy load").
+
+Daniel said "We have 3 instances of this bug in drm/i915. One case even
+where we switch between the interruptible and not interruptible
+wait_event_timeout variants, foolishly presuming they have the same
+semantics. I very much like this."
+
+One such bug is reported at
+ https://bugs.freedesktop.org/show_bug.cgi?id=64133
+
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Acked-by: David Howells <dhowells@redhat.com>
+Acked-by: Jens Axboe <axboe@kernel.dk>
+Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+Cc: Dave Jones <davej@redhat.com>
+Cc: Lukas Czerner <lczerner@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/wait.h | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/include/linux/wait.h
++++ b/include/linux/wait.h
+@@ -233,6 +233,8 @@ do { \
+ if (!ret) \
+ break; \
+ } \
++ if (!ret && (condition)) \
++ ret = 1; \
+ finish_wait(&wq, &__wait); \
+ } while (0)
+
+@@ -249,8 +251,9 @@ do { \
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+- * The function returns 0 if the @timeout elapsed, and the remaining
+- * jiffies if the condition evaluated to true before the timeout elapsed.
++ * The function returns 0 if the @timeout elapsed, or the remaining
++ * jiffies (at least 1) if the @condition evaluated to %true before
++ * the @timeout elapsed.
+ */
+ #define wait_event_timeout(wq, condition, timeout) \
+ ({ \
+@@ -318,6 +321,8 @@ do { \
+ ret = -ERESTARTSYS; \
+ break; \
+ } \
++ if (!ret && (condition)) \
++ ret = 1; \
+ finish_wait(&wq, &__wait); \
+ } while (0)
+
+@@ -334,9 +339,10 @@ do { \
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+- * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it
+- * was interrupted by a signal, and the remaining jiffies otherwise
+- * if the condition evaluated to true before the timeout elapsed.
++ * Returns:
++ * 0 if the @timeout elapsed, -%ERESTARTSYS if it was interrupted by
++ * a signal, or the remaining jiffies (at least 1) if the @condition
++ * evaluated to %true before the @timeout elapsed.
+ */
+ #define wait_event_interruptible_timeout(wq, condition, timeout) \
+ ({ \