]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 26 May 2013 00:24:30 +0000 (09:24 +0900)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 26 May 2013 00:24:30 +0000 (09:24 +0900)
added patches:
drivers-block-brd.c-fix-brd_lookup_page-race.patch
drivers-leds-leds-ot200.c-fix-error-caused-by-shifted-mask.patch
kirkwood-enable-pcie-port-1-on-qnap-ts-11x-ts-21x.patch
klist-del-waiter-from-klist_remove_waiters-before-wakeup-waitting-process.patch
mm-compaction-fix-of-improper-cache-flush-in-migration-code.patch
mm-mmu_notifier-re-fix-freed-page-still-mapped-in-secondary-mmu.patch
mm-pagewalk.c-walk_page_range-should-avoid-vm_pfnmap-areas.patch
mm-thp-use-pmd_populate-to-update-the-pmd-with-pgtable_t-pointer.patch
nilfs2-fix-issue-of-nilfs_set_page_dirty-for-page-at-eof-boundary.patch
ocfs2-goto-out_unlock-if-ocfs2_get_clusters_nocache-failed-in-ocfs2_fiemap.patch
perf-net_dropmonitor-fix-symbol-relative-addresses.patch
perf-net_dropmonitor-fix-trace-parameter-order.patch
tg3-fix-data-corruption-on-5725-with-tso.patch
wait-fix-false-timeouts-when-using-wait_event_timeout.patch

15 files changed:
queue-3.4/drivers-block-brd.c-fix-brd_lookup_page-race.patch [new file with mode: 0644]
queue-3.4/drivers-leds-leds-ot200.c-fix-error-caused-by-shifted-mask.patch [new file with mode: 0644]
queue-3.4/kirkwood-enable-pcie-port-1-on-qnap-ts-11x-ts-21x.patch [new file with mode: 0644]
queue-3.4/klist-del-waiter-from-klist_remove_waiters-before-wakeup-waitting-process.patch [new file with mode: 0644]
queue-3.4/mm-compaction-fix-of-improper-cache-flush-in-migration-code.patch [new file with mode: 0644]
queue-3.4/mm-mmu_notifier-re-fix-freed-page-still-mapped-in-secondary-mmu.patch [new file with mode: 0644]
queue-3.4/mm-pagewalk.c-walk_page_range-should-avoid-vm_pfnmap-areas.patch [new file with mode: 0644]
queue-3.4/mm-thp-use-pmd_populate-to-update-the-pmd-with-pgtable_t-pointer.patch [new file with mode: 0644]
queue-3.4/nilfs2-fix-issue-of-nilfs_set_page_dirty-for-page-at-eof-boundary.patch [new file with mode: 0644]
queue-3.4/ocfs2-goto-out_unlock-if-ocfs2_get_clusters_nocache-failed-in-ocfs2_fiemap.patch [new file with mode: 0644]
queue-3.4/perf-net_dropmonitor-fix-symbol-relative-addresses.patch [new file with mode: 0644]
queue-3.4/perf-net_dropmonitor-fix-trace-parameter-order.patch [new file with mode: 0644]
queue-3.4/series
queue-3.4/tg3-fix-data-corruption-on-5725-with-tso.patch [new file with mode: 0644]
queue-3.4/wait-fix-false-timeouts-when-using-wait_event_timeout.patch [new file with mode: 0644]

diff --git a/queue-3.4/drivers-block-brd.c-fix-brd_lookup_page-race.patch b/queue-3.4/drivers-block-brd.c-fix-brd_lookup_page-race.patch
new file mode 100644 (file)
index 0000000..1d5c97f
--- /dev/null
@@ -0,0 +1,43 @@
+From dfd20b2b174d3a9b258ea3b7a35ead33576587b1 Mon Sep 17 00:00:00 2001
+From: Brian Behlendorf <behlendorf1@llnl.gov>
+Date: Fri, 24 May 2013 15:55:28 -0700
+Subject: drivers/block/brd.c: fix brd_lookup_page() race
+
+From: Brian Behlendorf <behlendorf1@llnl.gov>
+
+commit dfd20b2b174d3a9b258ea3b7a35ead33576587b1 upstream.
+
+The index on the page must be set before it is inserted in the radix
+tree.  Otherwise there is a small race which can occur during lookup
+where the page can be found with the incorrect index.  This will trigger
+the BUG_ON() in brd_lookup_page().
+
+Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
+Reported-by: Chris Wedgwood <cw@f00f.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/brd.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/block/brd.c
++++ b/drivers/block/brd.c
+@@ -117,13 +117,13 @@ static struct page *brd_insert_page(stru
+       spin_lock(&brd->brd_lock);
+       idx = sector >> PAGE_SECTORS_SHIFT;
++      page->index = idx;
+       if (radix_tree_insert(&brd->brd_pages, idx, page)) {
+               __free_page(page);
+               page = radix_tree_lookup(&brd->brd_pages, idx);
+               BUG_ON(!page);
+               BUG_ON(page->index != idx);
+-      } else
+-              page->index = idx;
++      }
+       spin_unlock(&brd->brd_lock);
+       radix_tree_preload_end();
diff --git a/queue-3.4/drivers-leds-leds-ot200.c-fix-error-caused-by-shifted-mask.patch b/queue-3.4/drivers-leds-leds-ot200.c-fix-error-caused-by-shifted-mask.patch
new file mode 100644 (file)
index 0000000..2a43085
--- /dev/null
@@ -0,0 +1,74 @@
+From 4b949b8af12e24b8a48fa5bb775a13b558d9f4da Mon Sep 17 00:00:00 2001
+From: Christian Gmeiner <christian.gmeiner@gmail.com>
+Date: Fri, 24 May 2013 15:55:22 -0700
+Subject: drivers/leds/leds-ot200.c: fix error caused by shifted mask
+
+From: Christian Gmeiner <christian.gmeiner@gmail.com>
+
+commit 4b949b8af12e24b8a48fa5bb775a13b558d9f4da upstream.
+
+During the development of this driver an in-house register documentation
+was used.  The last week some integration tests were done and this
+problem was found.  It turned out that the released register
+documentation is wrong.
+
+The fix is very simple: shift all masks by one.
+
+Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
+Cc: Bryan Wu <cooloney@gmail.com>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/leds/leds-ot200.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/drivers/leds/leds-ot200.c
++++ b/drivers/leds/leds-ot200.c
+@@ -47,37 +47,37 @@ static struct ot200_led leds[] = {
+       {
+               .name = "led_1",
+               .port = 0x49,
+-              .mask = BIT(7),
++              .mask = BIT(6),
+       },
+       {
+               .name = "led_2",
+               .port = 0x49,
+-              .mask = BIT(6),
++              .mask = BIT(5),
+       },
+       {
+               .name = "led_3",
+               .port = 0x49,
+-              .mask = BIT(5),
++              .mask = BIT(4),
+       },
+       {
+               .name = "led_4",
+               .port = 0x49,
+-              .mask = BIT(4),
++              .mask = BIT(3),
+       },
+       {
+               .name = "led_5",
+               .port = 0x49,
+-              .mask = BIT(3),
++              .mask = BIT(2),
+       },
+       {
+               .name = "led_6",
+               .port = 0x49,
+-              .mask = BIT(2),
++              .mask = BIT(1),
+       },
+       {
+               .name = "led_7",
+               .port = 0x49,
+-              .mask = BIT(1),
++              .mask = BIT(0),
+       }
+ };
diff --git a/queue-3.4/kirkwood-enable-pcie-port-1-on-qnap-ts-11x-ts-21x.patch b/queue-3.4/kirkwood-enable-pcie-port-1-on-qnap-ts-11x-ts-21x.patch
new file mode 100644 (file)
index 0000000..4913bb5
--- /dev/null
@@ -0,0 +1,34 @@
+From 99e11334dcb846f9b76fb808196c7f47aa83abb3 Mon Sep 17 00:00:00 2001
+From: Martin Michlmayr <tbm@cyrius.com>
+Date: Sun, 21 Apr 2013 17:14:00 +0100
+Subject: Kirkwood: Enable PCIe port 1 on QNAP TS-11x/TS-21x
+
+From: Martin Michlmayr <tbm@cyrius.com>
+
+commit 99e11334dcb846f9b76fb808196c7f47aa83abb3 upstream.
+
+Enable KW_PCIE1 on QNAP TS-11x/TS-21x devices as newer revisions
+(rev 1.3) have a USB 3.0 chip from Etron on PCIe port 1.  Thanks
+to Marek Vasut for identifying this issue!
+
+Signed-off-by: Martin Michlmayr <tbm@cyrius.com>
+Tested-by: Marek Vasut <marex@denx.de>
+Acked-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: Jason Cooper <jason@lakedaemon.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/mach-kirkwood/ts219-setup.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/mach-kirkwood/ts219-setup.c
++++ b/arch/arm/mach-kirkwood/ts219-setup.c
+@@ -124,7 +124,7 @@ static void __init qnap_ts219_init(void)
+ static int __init ts219_pci_init(void)
+ {
+       if (machine_is_ts219())
+-              kirkwood_pcie_init(KW_PCIE0);
++              kirkwood_pcie_init(KW_PCIE1 | KW_PCIE0);
+       return 0;
+ }
diff --git a/queue-3.4/klist-del-waiter-from-klist_remove_waiters-before-wakeup-waitting-process.patch b/queue-3.4/klist-del-waiter-from-klist_remove_waiters-before-wakeup-waitting-process.patch
new file mode 100644 (file)
index 0000000..9545f0a
--- /dev/null
@@ -0,0 +1,40 @@
+From ac5a2962b02f57dea76d314ef2521a2170b28ab6 Mon Sep 17 00:00:00 2001
+From: "wang, biao" <biao.wang@intel.com>
+Date: Thu, 16 May 2013 09:50:13 +0800
+Subject: klist: del waiter from klist_remove_waiters before wakeup waitting process
+
+From: "wang, biao" <biao.wang@intel.com>
+
+commit ac5a2962b02f57dea76d314ef2521a2170b28ab6 upstream.
+
+There is a race between klist_remove and klist_release. klist_remove
+uses a local var waiter saved on stack. When klist_release calls
+wake_up_process(waiter->process) to wake up the waiter, waiter might run
+immediately and reuse the stack. Then, klist_release calls
+list_del(&waiter->list) to change previous
+wait data and cause prior waiter thread corrupt.
+
+The patch fixes it against kernel 3.9.
+
+Signed-off-by: wang, biao <biao.wang@intel.com>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/klist.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/lib/klist.c
++++ b/lib/klist.c
+@@ -193,10 +193,10 @@ static void klist_release(struct kref *k
+               if (waiter->node != n)
+                       continue;
++              list_del(&waiter->list);
+               waiter->woken = 1;
+               mb();
+               wake_up_process(waiter->process);
+-              list_del(&waiter->list);
+       }
+       spin_unlock(&klist_remove_lock);
+       knode_set_klist(n, NULL);
diff --git a/queue-3.4/mm-compaction-fix-of-improper-cache-flush-in-migration-code.patch b/queue-3.4/mm-compaction-fix-of-improper-cache-flush-in-migration-code.patch
new file mode 100644 (file)
index 0000000..2fb2282
--- /dev/null
@@ -0,0 +1,55 @@
+From c2cc499c5bcf9040a738f49e8051b42078205748 Mon Sep 17 00:00:00 2001
+From: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
+Date: Fri, 24 May 2013 15:55:18 -0700
+Subject: mm compaction: fix of improper cache flush in migration code
+
+From: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
+
+commit c2cc499c5bcf9040a738f49e8051b42078205748 upstream.
+
+Page 'new' during MIGRATION can't be flushed with flush_cache_page().
+Using flush_cache_page(vma, addr, pfn) is justified only if the page is
+already placed in process page table, and that is done right after
+flush_cache_page().  But without it the arch function has no knowledge
+of process PTE and does nothing.
+
+Besides that, flush_cache_page() flushes an application cache page, but
+the kernel has a different page virtual address and dirtied it.
+
+Replace it with flush_dcache_page(new) which is the proper usage.
+
+The old page is flushed in try_to_unmap_one() before migration.
+
+This bug takes place in Sead3 board with M14Kc MIPS CPU without cache
+aliasing (but Harvard arch - separate I and D cache) in tight memory
+environment (128MB) each 1-3days on SOAK test.  It fails in cc1 during
+kernel build (SIGILL, SIGBUS, SIGSEG) if CONFIG_COMPACTION is switched
+ON.
+
+Signed-off-by: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
+Cc: Leonid Yegoshin <yegoshin@mips.com>
+Acked-by: Rik van Riel <riel@redhat.com>
+Cc: Michal Hocko <mhocko@suse.cz>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Ralf Baechle <ralf@linux-mips.org>
+Cc: Russell King <rmk@arm.linux.org.uk>
+Cc: David Miller <davem@davemloft.net>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/migrate.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -145,7 +145,7 @@ static int remove_migration_pte(struct p
+       if (PageHuge(new))
+               pte = pte_mkhuge(pte);
+ #endif
+-      flush_cache_page(vma, addr, pte_pfn(pte));
++      flush_dcache_page(new);
+       set_pte_at(mm, addr, ptep, pte);
+       if (PageHuge(new)) {
diff --git a/queue-3.4/mm-mmu_notifier-re-fix-freed-page-still-mapped-in-secondary-mmu.patch b/queue-3.4/mm-mmu_notifier-re-fix-freed-page-still-mapped-in-secondary-mmu.patch
new file mode 100644 (file)
index 0000000..0b7d220
--- /dev/null
@@ -0,0 +1,181 @@
+From d34883d4e35c0a994e91dd847a82b4c9e0c31d83 Mon Sep 17 00:00:00 2001
+From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
+Date: Fri, 24 May 2013 15:55:11 -0700
+Subject: mm: mmu_notifier: re-fix freed page still mapped in secondary MMU
+
+From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
+
+commit d34883d4e35c0a994e91dd847a82b4c9e0c31d83 upstream.
+
+Commit 751efd8610d3 ("mmu_notifier_unregister NULL Pointer deref and
+multiple ->release()") breaks the fix 3ad3d901bbcf ("mm: mmu_notifier:
+fix freed page still mapped in secondary MMU").
+
+Since hlist_for_each_entry_rcu() is changed now, we can not revert that
+patch directly, so this patch reverts the commit and simply fix the bug
+spotted by that patch
+
+This bug spotted by commit 751efd8610d3 is:
+
+    There is a race condition between mmu_notifier_unregister() and
+    __mmu_notifier_release().
+
+    Assume two tasks, one calling mmu_notifier_unregister() as a result
+    of a filp_close() ->flush() callout (task A), and the other calling
+    mmu_notifier_release() from an mmput() (task B).
+
+                        A                               B
+    t1                                            srcu_read_lock()
+    t2            if (!hlist_unhashed())
+    t3                                            srcu_read_unlock()
+    t4            srcu_read_lock()
+    t5                                            hlist_del_init_rcu()
+    t6                                            synchronize_srcu()
+    t7            srcu_read_unlock()
+    t8            hlist_del_rcu()  <--- NULL pointer deref.
+
+This can be fixed by using hlist_del_init_rcu instead of hlist_del_rcu.
+
+The another issue spotted in the commit is "multiple ->release()
+callouts", we needn't care it too much because it is really rare (e.g,
+can not happen on kvm since mmu-notify is unregistered after
+exit_mmap()) and the later call of multiple ->release should be fast
+since all the pages have already been released by the first call.
+Anyway, this issue should be fixed in a separate patch.
+
+-stable suggestions: Any version that has commit 751efd8610d3 need to be
+backported.  I find the oldest version has this commit is 3.0-stable.
+
+[akpm@linux-foundation.org: tweak comments]
+Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
+Tested-by: Robin Holt <holt@sgi.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mmu_notifier.c |   79 ++++++++++++++++++++++++++----------------------------
+ 1 file changed, 39 insertions(+), 40 deletions(-)
+
+--- a/mm/mmu_notifier.c
++++ b/mm/mmu_notifier.c
+@@ -40,48 +40,44 @@ void __mmu_notifier_release(struct mm_st
+       int id;
+       /*
+-       * srcu_read_lock() here will block synchronize_srcu() in
+-       * mmu_notifier_unregister() until all registered
+-       * ->release() callouts this function makes have
+-       * returned.
++       * SRCU here will block mmu_notifier_unregister until
++       * ->release returns.
+        */
+       id = srcu_read_lock(&srcu);
++      hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist)
++              /*
++               * If ->release runs before mmu_notifier_unregister it must be
++               * handled, as it's the only way for the driver to flush all
++               * existing sptes and stop the driver from establishing any more
++               * sptes before all the pages in the mm are freed.
++               */
++              if (mn->ops->release)
++                      mn->ops->release(mn, mm);
++      srcu_read_unlock(&srcu, id);
++
+       spin_lock(&mm->mmu_notifier_mm->lock);
+       while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
+               mn = hlist_entry(mm->mmu_notifier_mm->list.first,
+                                struct mmu_notifier,
+                                hlist);
+-
+               /*
+-               * Unlink.  This will prevent mmu_notifier_unregister()
+-               * from also making the ->release() callout.
++               * We arrived before mmu_notifier_unregister so
++               * mmu_notifier_unregister will do nothing other than to wait
++               * for ->release to finish and for mmu_notifier_unregister to
++               * return.
+                */
+               hlist_del_init_rcu(&mn->hlist);
+-              spin_unlock(&mm->mmu_notifier_mm->lock);
+-
+-              /*
+-               * Clear sptes. (see 'release' description in mmu_notifier.h)
+-               */
+-              if (mn->ops->release)
+-                      mn->ops->release(mn, mm);
+-
+-              spin_lock(&mm->mmu_notifier_mm->lock);
+       }
+       spin_unlock(&mm->mmu_notifier_mm->lock);
+       /*
+-       * All callouts to ->release() which we have done are complete.
+-       * Allow synchronize_srcu() in mmu_notifier_unregister() to complete
+-       */
+-      srcu_read_unlock(&srcu, id);
+-
+-      /*
+-       * mmu_notifier_unregister() may have unlinked a notifier and may
+-       * still be calling out to it.  Additionally, other notifiers
+-       * may have been active via vmtruncate() et. al. Block here
+-       * to ensure that all notifier callouts for this mm have been
+-       * completed and the sptes are really cleaned up before returning
+-       * to exit_mmap().
++       * synchronize_srcu here prevents mmu_notifier_release from returning to
++       * exit_mmap (which would proceed with freeing all pages in the mm)
++       * until the ->release method returns, if it was invoked by
++       * mmu_notifier_unregister.
++       *
++       * The mmu_notifier_mm can't go away from under us because one mm_count
++       * is held by exit_mmap.
+        */
+       synchronize_srcu(&srcu);
+ }
+@@ -302,31 +298,34 @@ void mmu_notifier_unregister(struct mmu_
+ {
+       BUG_ON(atomic_read(&mm->mm_count) <= 0);
+-      spin_lock(&mm->mmu_notifier_mm->lock);
+       if (!hlist_unhashed(&mn->hlist)) {
++              /*
++               * SRCU here will force exit_mmap to wait for ->release to
++               * finish before freeing the pages.
++               */
+               int id;
++              id = srcu_read_lock(&srcu);
+               /*
+-               * Ensure we synchronize up with __mmu_notifier_release().
++               * exit_mmap will block in mmu_notifier_release to guarantee
++               * that ->release is called before freeing the pages.
+                */
+-              id = srcu_read_lock(&srcu);
+-
+-              hlist_del_rcu(&mn->hlist);
+-              spin_unlock(&mm->mmu_notifier_mm->lock);
+-
+               if (mn->ops->release)
+                       mn->ops->release(mn, mm);
++              srcu_read_unlock(&srcu, id);
++              spin_lock(&mm->mmu_notifier_mm->lock);
+               /*
+-               * Allow __mmu_notifier_release() to complete.
++               * Can not use list_del_rcu() since __mmu_notifier_release
++               * can delete it before we hold the lock.
+                */
+-              srcu_read_unlock(&srcu, id);
+-      } else
++              hlist_del_init_rcu(&mn->hlist);
+               spin_unlock(&mm->mmu_notifier_mm->lock);
++      }
+       /*
+-       * Wait for any running method to finish, including ->release() if it
+-       * was run by __mmu_notifier_release() instead of us.
++       * Wait for any running method to finish, of course including
++       * ->release if it was run by mmu_notifier_relase instead of us.
+        */
+       synchronize_srcu(&srcu);
diff --git a/queue-3.4/mm-pagewalk.c-walk_page_range-should-avoid-vm_pfnmap-areas.patch b/queue-3.4/mm-pagewalk.c-walk_page_range-should-avoid-vm_pfnmap-areas.patch
new file mode 100644 (file)
index 0000000..4be7e2d
--- /dev/null
@@ -0,0 +1,151 @@
+From a9ff785e4437c83d2179161e012f5bdfbd6381f0 Mon Sep 17 00:00:00 2001
+From: Cliff Wickman <cpw@sgi.com>
+Date: Fri, 24 May 2013 15:55:36 -0700
+Subject: mm/pagewalk.c: walk_page_range should avoid VM_PFNMAP areas
+
+From: Cliff Wickman <cpw@sgi.com>
+
+commit a9ff785e4437c83d2179161e012f5bdfbd6381f0 upstream.
+
+A panic can be caused by simply cat'ing /proc/<pid>/smaps while an
+application has a VM_PFNMAP range.  It happened in-house when a
+benchmarker was trying to decipher the memory layout of his program.
+
+/proc/<pid>/smaps and similar walks through a user page table should not
+be looking at VM_PFNMAP areas.
+
+Certain tests in walk_page_range() (specifically split_huge_page_pmd())
+assume that all the mapped PFN's are backed with page structures.  And
+this is not usually true for VM_PFNMAP areas.  This can result in panics
+on kernel page faults when attempting to address those page structures.
+
+There are a half dozen callers of walk_page_range() that walk through a
+task's entire page table (as N.  Horiguchi pointed out).  So rather than
+change all of them, this patch changes just walk_page_range() to ignore
+VM_PFNMAP areas.
+
+The logic of hugetlb_vma() is moved back into walk_page_range(), as we
+want to test any vma in the range.
+
+VM_PFNMAP areas are used by:
+- graphics memory manager   gpu/drm/drm_gem.c
+- global reference unit     sgi-gru/grufile.c
+- sgi special memory        char/mspec.c
+- and probably several out-of-tree modules
+
+[akpm@linux-foundation.org: remove now-unused hugetlb_vma() stub]
+Signed-off-by: Cliff Wickman <cpw@sgi.com>
+Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: David Sterba <dsterba@suse.cz>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/pagewalk.c |   70 +++++++++++++++++++++++++++++-----------------------------
+ 1 file changed, 36 insertions(+), 34 deletions(-)
+
+--- a/mm/pagewalk.c
++++ b/mm/pagewalk.c
+@@ -127,28 +127,7 @@ static int walk_hugetlb_range(struct vm_
+       return 0;
+ }
+-static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
+-{
+-      struct vm_area_struct *vma;
+-
+-      /* We don't need vma lookup at all. */
+-      if (!walk->hugetlb_entry)
+-              return NULL;
+-
+-      VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
+-      vma = find_vma(walk->mm, addr);
+-      if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma))
+-              return vma;
+-
+-      return NULL;
+-}
+-
+ #else /* CONFIG_HUGETLB_PAGE */
+-static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
+-{
+-      return NULL;
+-}
+-
+ static int walk_hugetlb_range(struct vm_area_struct *vma,
+                             unsigned long addr, unsigned long end,
+                             struct mm_walk *walk)
+@@ -199,30 +178,53 @@ int walk_page_range(unsigned long addr,
+       if (!walk->mm)
+               return -EINVAL;
++      VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
++
+       pgd = pgd_offset(walk->mm, addr);
+       do {
+-              struct vm_area_struct *vma;
++              struct vm_area_struct *vma = NULL;
+               next = pgd_addr_end(addr, end);
+               /*
+-               * handle hugetlb vma individually because pagetable walk for
+-               * the hugetlb page is dependent on the architecture and
+-               * we can't handled it in the same manner as non-huge pages.
++               * This function was not intended to be vma based.
++               * But there are vma special cases to be handled:
++               * - hugetlb vma's
++               * - VM_PFNMAP vma's
+                */
+-              vma = hugetlb_vma(addr, walk);
++              vma = find_vma(walk->mm, addr);
+               if (vma) {
+-                      if (vma->vm_end < next)
++                      /*
++                       * There are no page structures backing a VM_PFNMAP
++                       * range, so do not allow split_huge_page_pmd().
++                       */
++                      if ((vma->vm_start <= addr) &&
++                          (vma->vm_flags & VM_PFNMAP)) {
+                               next = vma->vm_end;
++                              pgd = pgd_offset(walk->mm, next);
++                              continue;
++                      }
+                       /*
+-                       * Hugepage is very tightly coupled with vma, so
+-                       * walk through hugetlb entries within a given vma.
++                       * Handle hugetlb vma individually because pagetable
++                       * walk for the hugetlb page is dependent on the
++                       * architecture and we can't handled it in the same
++                       * manner as non-huge pages.
+                        */
+-                      err = walk_hugetlb_range(vma, addr, next, walk);
+-                      if (err)
+-                              break;
+-                      pgd = pgd_offset(walk->mm, next);
+-                      continue;
++                      if (walk->hugetlb_entry && (vma->vm_start <= addr) &&
++                          is_vm_hugetlb_page(vma)) {
++                              if (vma->vm_end < next)
++                                      next = vma->vm_end;
++                              /*
++                               * Hugepage is very tightly coupled with vma,
++                               * so walk through hugetlb entries within a
++                               * given vma.
++                               */
++                              err = walk_hugetlb_range(vma, addr, next, walk);
++                              if (err)
++                                      break;
++                              pgd = pgd_offset(walk->mm, next);
++                              continue;
++                      }
+               }
+               if (pgd_none_or_clear_bad(pgd)) {
diff --git a/queue-3.4/mm-thp-use-pmd_populate-to-update-the-pmd-with-pgtable_t-pointer.patch b/queue-3.4/mm-thp-use-pmd_populate-to-update-the-pmd-with-pgtable_t-pointer.patch
new file mode 100644 (file)
index 0000000..9267e61
--- /dev/null
@@ -0,0 +1,45 @@
+From 7c3425123ddfdc5f48e7913ff59d908789712b18 Mon Sep 17 00:00:00 2001
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Date: Fri, 24 May 2013 15:55:21 -0700
+Subject: mm/THP: use pmd_populate() to update the pmd with pgtable_t pointer
+
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 7c3425123ddfdc5f48e7913ff59d908789712b18 upstream.
+
+We should not use set_pmd_at to update pmd_t with pgtable_t pointer.
+set_pmd_at is used to set pmd with huge pte entries and architectures
+like ppc64, clear few flags from the pte when saving a new entry.
+Without this change we observe bad pte errors like below on ppc64 with
+THP enabled.
+
+  BUG: Bad page map in process ld mm=0xc000001ee39f4780 pte:7fc3f37848000001 pmd:c000001ec0000000
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/huge_memory.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1949,7 +1949,12 @@ static void collapse_huge_page(struct mm
+               pte_unmap(pte);
+               spin_lock(&mm->page_table_lock);
+               BUG_ON(!pmd_none(*pmd));
+-              set_pmd_at(mm, address, pmd, _pmd);
++              /*
++               * We can only use set_pmd_at when establishing
++               * hugepmds and never for establishing regular pmds that
++               * points to regular pagetables. Use pmd_populate for that
++               */
++              pmd_populate(mm, pmd, pmd_pgtable(_pmd));
+               spin_unlock(&mm->page_table_lock);
+               anon_vma_unlock(vma->anon_vma);
+               goto out;
diff --git a/queue-3.4/nilfs2-fix-issue-of-nilfs_set_page_dirty-for-page-at-eof-boundary.patch b/queue-3.4/nilfs2-fix-issue-of-nilfs_set_page_dirty-for-page-at-eof-boundary.patch
new file mode 100644 (file)
index 0000000..172dadf
--- /dev/null
@@ -0,0 +1,153 @@
+From 136e8770cd5d1fe38b3c613100dd6dc4db6d4fa6 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
+Date: Fri, 24 May 2013 15:55:29 -0700
+Subject: nilfs2: fix issue of nilfs_set_page_dirty() for page at EOF boundary
+
+From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
+
+commit 136e8770cd5d1fe38b3c613100dd6dc4db6d4fa6 upstream.
+
+nilfs2: fix issue of nilfs_set_page_dirty for page at EOF boundary
+
+DESCRIPTION:
+ There are use-cases when NILFS2 file system (formatted with block size
+lesser than 4 KB) can be remounted in RO mode because of encountering of
+"broken bmap" issue.
+
+The issue was reported by Anthony Doggett <Anthony2486@interfaces.org.uk>:
+ "The machine I've been trialling nilfs on is running Debian Testing,
+  Linux version 3.2.0-4-686-pae (debian-kernel@lists.debian.org) (gcc
+  version 4.6.3 (Debian 4.6.3-14) ) #1 SMP Debian 3.2.35-2), but I've
+  also reproduced it (identically) with Debian Unstable amd64 and Debian
+  Experimental (using the 3.8-trunk kernel).  The problematic partitions
+  were formatted with "mkfs.nilfs2 -b 1024 -B 8192"."
+
+SYMPTOMS:
+(1) System log contains error messages likewise:
+
+    [63102.496756] nilfs_direct_assign: invalid pointer: 0
+    [63102.496786] NILFS error (device dm-17): nilfs_bmap_assign: broken bmap (inode number=28)
+    [63102.496798]
+    [63102.524403] Remounting filesystem read-only
+
+(2) The NILFS2 file system is remounted in RO mode.
+
+REPRODUSING PATH:
+(1) Create volume group with name "unencrypted" by means of vgcreate utility.
+(2) Run script (prepared by Anthony Doggett <Anthony2486@interfaces.org.uk>):
+
+----------------[BEGIN SCRIPT]--------------------
+
+VG=unencrypted
+lvcreate --size 2G --name ntest $VG
+mkfs.nilfs2 -b 1024 -B 8192 /dev/mapper/$VG-ntest
+mkdir /var/tmp/n
+mkdir /var/tmp/n/ntest
+mount /dev/mapper/$VG-ntest /var/tmp/n/ntest
+mkdir /var/tmp/n/ntest/thedir
+cd /var/tmp/n/ntest/thedir
+sleep 2
+date
+darcs init
+sleep 2
+dmesg|tail -n 5
+date
+darcs whatsnew || true
+date
+sleep 2
+dmesg|tail -n 5
+----------------[END SCRIPT]--------------------
+
+REPRODUCIBILITY: 100%
+
+INVESTIGATION:
+As it was discovered, the issue takes place during segment
+construction after executing such sequence of user-space operations:
+
+  open("_darcs/index", O_RDWR|O_CREAT|O_NOCTTY, 0666) = 7
+  fstat(7, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
+  ftruncate(7, 60)
+
+The error message "NILFS error (device dm-17): nilfs_bmap_assign: broken
+bmap (inode number=28)" takes place because of trying to get block
+number for third block of the file with logical offset #3072 bytes.  As
+it is possible to see from above output, the file has 60 bytes of the
+whole size.  So, it is enough one block (1 KB in size) allocation for
+the whole file.  Trying to operate with several blocks instead of one
+takes place because of discovering several dirty buffers for this file
+in nilfs_segctor_scan_file() method.
+
+The root cause of this issue is in nilfs_set_page_dirty function which
+is called just before writing to an mmapped page.
+
+When nilfs_page_mkwrite function handles a page at EOF boundary, it
+fills hole blocks only inside EOF through __block_page_mkwrite().
+
+The __block_page_mkwrite() function calls set_page_dirty() after filling
+hole blocks, thus nilfs_set_page_dirty function (=
+a_ops->set_page_dirty) is called.  However, the current implementation
+of nilfs_set_page_dirty() wrongly marks all buffers dirty even for page
+at EOF boundary.
+
+As a result, buffers outside EOF are inconsistently marked dirty and
+queued for write even though they are not mapped with nilfs_get_block
+function.
+
+FIX:
+This modifies nilfs_set_page_dirty() not to mark hole blocks dirty.
+
+Thanks to Vyacheslav Dubeyko for his effort on analysis and proposals
+for this issue.
+
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
+Reported-by: Anthony Doggett <Anthony2486@interfaces.org.uk>
+Reported-by: Vyacheslav Dubeyko <slava@dubeyko.com>
+Cc: Vyacheslav Dubeyko <slava@dubeyko.com>
+Tested-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nilfs2/inode.c |   27 +++++++++++++++++++++++----
+ 1 file changed, 23 insertions(+), 4 deletions(-)
+
+--- a/fs/nilfs2/inode.c
++++ b/fs/nilfs2/inode.c
+@@ -195,13 +195,32 @@ static int nilfs_writepage(struct page *
+ static int nilfs_set_page_dirty(struct page *page)
+ {
+-      int ret = __set_page_dirty_buffers(page);
++      int ret = __set_page_dirty_nobuffers(page);
+-      if (ret) {
++      if (page_has_buffers(page)) {
+               struct inode *inode = page->mapping->host;
+-              unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits);
++              unsigned nr_dirty = 0;
++              struct buffer_head *bh, *head;
+-              nilfs_set_file_dirty(inode, nr_dirty);
++              /*
++               * This page is locked by callers, and no other thread
++               * concurrently marks its buffers dirty since they are
++               * only dirtied through routines in fs/buffer.c in
++               * which call sites of mark_buffer_dirty are protected
++               * by page lock.
++               */
++              bh = head = page_buffers(page);
++              do {
++                      /* Do not mark hole blocks dirty */
++                      if (buffer_dirty(bh) || !buffer_mapped(bh))
++                              continue;
++
++                      set_buffer_dirty(bh);
++                      nr_dirty++;
++              } while (bh = bh->b_this_page, bh != head);
++
++              if (nr_dirty)
++                      nilfs_set_file_dirty(inode, nr_dirty);
+       }
+       return ret;
+ }
diff --git a/queue-3.4/ocfs2-goto-out_unlock-if-ocfs2_get_clusters_nocache-failed-in-ocfs2_fiemap.patch b/queue-3.4/ocfs2-goto-out_unlock-if-ocfs2_get_clusters_nocache-failed-in-ocfs2_fiemap.patch
new file mode 100644 (file)
index 0000000..1458c2f
--- /dev/null
@@ -0,0 +1,41 @@
+From b4ca2b4b577c3530e34dcfaafccb2cc680ce95d1 Mon Sep 17 00:00:00 2001
+From: Joseph Qi <joseph.qi@huawei.com>
+Date: Fri, 24 May 2013 15:55:34 -0700
+Subject: ocfs2: goto out_unlock if ocfs2_get_clusters_nocache() failed in ocfs2_fiemap()
+
+From: Joseph Qi <joseph.qi@huawei.com>
+
+commit b4ca2b4b577c3530e34dcfaafccb2cc680ce95d1 upstream.
+
+Last time we found there is lock/unlock bug in ocfs2_file_aio_write, and
+then we did a thorough search for all lock resources in
+ocfs2_inode_info, including rw, inode and open lockres and found this
+bug.  My kernel version is 3.0.13, and it is also in the lastest version
+3.9.  In ocfs2_fiemap, once ocfs2_get_clusters_nocache failed, it should
+goto out_unlock instead of out, because we need release buffer head, up
+read alloc sem and unlock inode.
+
+Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
+Reviewed-by: Jie Liu <jeff.liu@oracle.com>
+Cc: Mark Fasheh <mfasheh@suse.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Acked-by: Sunil Mushran <sunil.mushran@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/extent_map.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ocfs2/extent_map.c
++++ b/fs/ocfs2/extent_map.c
+@@ -791,7 +791,7 @@ int ocfs2_fiemap(struct inode *inode, st
+                                                &hole_size, &rec, &is_last);
+               if (ret) {
+                       mlog_errno(ret);
+-                      goto out;
++                      goto out_unlock;
+               }
+               if (rec.e_blkno == 0ULL) {
diff --git a/queue-3.4/perf-net_dropmonitor-fix-symbol-relative-addresses.patch b/queue-3.4/perf-net_dropmonitor-fix-symbol-relative-addresses.patch
new file mode 100644 (file)
index 0000000..8310e00
--- /dev/null
@@ -0,0 +1,37 @@
+From 5a1e99dd2028e00998d42029be86835d8ef4a46e Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Mon, 20 May 2013 14:45:26 +0000
+Subject: perf: net_dropmonitor: Fix symbol-relative addresses
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+commit 5a1e99dd2028e00998d42029be86835d8ef4a46e upstream.
+
+The comparison between traced and symbol addresses is backwards: if
+the traced address doesn't exactly match a symbol (which we don't
+expect it to), we'll show the next symbol and the offset to it,
+whereas we should show the previous symbol and the offset from it.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/scripts/python/net_dropmonitor.py |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/tools/perf/scripts/python/net_dropmonitor.py
++++ b/tools/perf/scripts/python/net_dropmonitor.py
+@@ -40,9 +40,9 @@ def get_kallsyms_table():
+ def get_sym(sloc):
+       loc = int(sloc)
+-      for i in kallsyms:
+-              if (i['loc'] >= loc):
+-                      return (i['name'], i['loc']-loc)
++      for i in kallsyms[::-1]:
++              if loc >= i['loc']:
++                      return (i['name'], loc - i['loc'])
+       return (None, 0)
+ def print_drop_table():
diff --git a/queue-3.4/perf-net_dropmonitor-fix-trace-parameter-order.patch b/queue-3.4/perf-net_dropmonitor-fix-trace-parameter-order.patch
new file mode 100644 (file)
index 0000000..ddd1952
--- /dev/null
@@ -0,0 +1,30 @@
+From 140c3c6a2bcd2c31e2f7f5a8d59689724776c8e5 Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Mon, 20 May 2013 14:44:43 +0000
+Subject: perf: net_dropmonitor: Fix trace parameter order
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+commit 140c3c6a2bcd2c31e2f7f5a8d59689724776c8e5 upstream.
+
+This works much better if we don't treat protocol numbers as addresses.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/scripts/python/net_dropmonitor.py |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/perf/scripts/python/net_dropmonitor.py
++++ b/tools/perf/scripts/python/net_dropmonitor.py
+@@ -64,7 +64,7 @@ def trace_end():
+ # called from perf, when it finds a correspoinding event
+ def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm,
+-                      skbaddr, protocol, location):
++                 skbaddr, location, protocol):
+       slocation = str(location)
+       try:
+               drop_log[slocation] = drop_log[slocation] + 1
index 7ffddfa9be3afca204cfb1110986248d86a29231..790f22198a89d165adc3b7f530596d89bd6fb85f 100644 (file)
@@ -12,3 +12,17 @@ usb-xhci-override-bogus-bulk-wmaxpacketsize-values.patch
 usb-uhci-fix-for-suspend-of-virtual-hp-controller.patch
 cifs-only-set-ops-for-inodes-in-i_new-state.patch
 fat-fix-possible-overflow-for-fat_clusters.patch
+tg3-fix-data-corruption-on-5725-with-tso.patch
+perf-net_dropmonitor-fix-trace-parameter-order.patch
+perf-net_dropmonitor-fix-symbol-relative-addresses.patch
+ocfs2-goto-out_unlock-if-ocfs2_get_clusters_nocache-failed-in-ocfs2_fiemap.patch
+kirkwood-enable-pcie-port-1-on-qnap-ts-11x-ts-21x.patch
+drivers-leds-leds-ot200.c-fix-error-caused-by-shifted-mask.patch
+mm-compaction-fix-of-improper-cache-flush-in-migration-code.patch
+klist-del-waiter-from-klist_remove_waiters-before-wakeup-waitting-process.patch
+wait-fix-false-timeouts-when-using-wait_event_timeout.patch
+nilfs2-fix-issue-of-nilfs_set_page_dirty-for-page-at-eof-boundary.patch
+mm-mmu_notifier-re-fix-freed-page-still-mapped-in-secondary-mmu.patch
+drivers-block-brd.c-fix-brd_lookup_page-race.patch
+mm-pagewalk.c-walk_page_range-should-avoid-vm_pfnmap-areas.patch
+mm-thp-use-pmd_populate-to-update-the-pmd-with-pgtable_t-pointer.patch
diff --git a/queue-3.4/tg3-fix-data-corruption-on-5725-with-tso.patch b/queue-3.4/tg3-fix-data-corruption-on-5725-with-tso.patch
new file mode 100644 (file)
index 0000000..3baf50c
--- /dev/null
@@ -0,0 +1,55 @@
+From 0f0d15100a8ac875bdd408324c473e16d73d3557 Mon Sep 17 00:00:00 2001
+From: Michael Chan <mchan@broadcom.com>
+Date: Mon, 13 May 2013 11:04:16 +0000
+Subject: tg3: Fix data corruption on 5725 with TSO
+
+From: Michael Chan <mchan@broadcom.com>
+
+commit 0f0d15100a8ac875bdd408324c473e16d73d3557 upstream.
+
+The 5725 family of devices (asic rev 5762), corrupts TSO packets where
+the buffer is within MSS bytes of a 4G boundary (4G, 8G etc.). Detect
+this condition and trigger the workaround path.
+
+Signed-off-by: Michael Chan <mchan@broadcom.com>
+Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/broadcom/tg3.c |   17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -6622,6 +6622,20 @@ static inline int tg3_4g_overflow_test(d
+       return (base > 0xffffdcc0) && (base + len + 8 < base);
+ }
++/* Test for TSO DMA buffers that cross into regions which are within MSS bytes
++ * of any 4GB boundaries: 4G, 8G, etc
++ */
++static inline int tg3_4g_tso_overflow_test(struct tg3 *tp, dma_addr_t mapping,
++                                         u32 len, u32 mss)
++{
++      if (tg3_asic_rev(tp) == ASIC_REV_5762 && mss) {
++              u32 base = (u32) mapping & 0xffffffff;
++
++              return ((base + len + (mss & 0x3fff)) < base);
++      }
++      return 0;
++}
++
+ /* Test for DMA addresses > 40-bit */
+ static inline int tg3_40bit_overflow_test(struct tg3 *tp, dma_addr_t mapping,
+                                         int len)
+@@ -6658,6 +6672,9 @@ static bool tg3_tx_frag_set(struct tg3_n
+       if (tg3_4g_overflow_test(map, len))
+               hwbug = true;
++      if (tg3_4g_tso_overflow_test(tp, map, len, mss))
++              hwbug = true;
++
+       if (tg3_40bit_overflow_test(tp, map, len))
+               hwbug = true;
diff --git a/queue-3.4/wait-fix-false-timeouts-when-using-wait_event_timeout.patch b/queue-3.4/wait-fix-false-timeouts-when-using-wait_event_timeout.patch
new file mode 100644 (file)
index 0000000..6029a0a
--- /dev/null
@@ -0,0 +1,91 @@
+From 4c663cfc523a88d97a8309b04a089c27dc57fd7e Mon Sep 17 00:00:00 2001
+From: Imre Deak <imre.deak@intel.com>
+Date: Fri, 24 May 2013 15:55:09 -0700
+Subject: wait: fix false timeouts when using wait_event_timeout()
+
+From: Imre Deak <imre.deak@intel.com>
+
+commit 4c663cfc523a88d97a8309b04a089c27dc57fd7e upstream.
+
+Many callers of the wait_event_timeout() and
+wait_event_interruptible_timeout() expect that the return value will be
+positive if the specified condition becomes true before the timeout
+elapses.  However, at the moment this isn't guaranteed.  If the wake-up
+handler is delayed enough, the time remaining until timeout will be
+calculated as 0 - and passed back as a return value - even if the
+condition became true before the timeout has passed.
+
+Fix this by returning at least 1 if the condition becomes true.  This
+semantic is in line with what wait_for_condition_timeout() does; see
+commit bb10ed09 ("sched: fix wait_for_completion_timeout() spurious
+failure under heavy load").
+
+Daniel said "We have 3 instances of this bug in drm/i915.  One case even
+where we switch between the interruptible and not interruptible
+wait_event_timeout variants, foolishly presuming they have the same
+semantics.  I very much like this."
+
+One such bug is reported at
+  https://bugs.freedesktop.org/show_bug.cgi?id=64133
+
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Acked-by: David Howells <dhowells@redhat.com>
+Acked-by: Jens Axboe <axboe@kernel.dk>
+Cc: "Paul E.  McKenney" <paulmck@linux.vnet.ibm.com>
+Cc: Dave Jones <davej@redhat.com>
+Cc: Lukas Czerner <lczerner@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/wait.h |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/include/linux/wait.h
++++ b/include/linux/wait.h
+@@ -233,6 +233,8 @@ do {                                                                       \
+               if (!ret)                                               \
+                       break;                                          \
+       }                                                               \
++      if (!ret && (condition))                                        \
++              ret = 1;                                                \
+       finish_wait(&wq, &__wait);                                      \
+ } while (0)
+@@ -249,8 +251,9 @@ do {                                                                       \
+  * wake_up() has to be called after changing any variable that could
+  * change the result of the wait condition.
+  *
+- * The function returns 0 if the @timeout elapsed, and the remaining
+- * jiffies if the condition evaluated to true before the timeout elapsed.
++ * The function returns 0 if the @timeout elapsed, or the remaining
++ * jiffies (at least 1) if the @condition evaluated to %true before
++ * the @timeout elapsed.
+  */
+ #define wait_event_timeout(wq, condition, timeout)                    \
+ ({                                                                    \
+@@ -318,6 +321,8 @@ do {                                                                       \
+               ret = -ERESTARTSYS;                                     \
+               break;                                                  \
+       }                                                               \
++      if (!ret && (condition))                                        \
++              ret = 1;                                                \
+       finish_wait(&wq, &__wait);                                      \
+ } while (0)
+@@ -334,9 +339,10 @@ do {                                                                      \
+  * wake_up() has to be called after changing any variable that could
+  * change the result of the wait condition.
+  *
+- * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it
+- * was interrupted by a signal, and the remaining jiffies otherwise
+- * if the condition evaluated to true before the timeout elapsed.
++ * Returns:
++ * 0 if the @timeout elapsed, -%ERESTARTSYS if it was interrupted by
++ * a signal, or the remaining jiffies (at least 1) if the @condition
++ * evaluated to %true before the @timeout elapsed.
+  */
+ #define wait_event_interruptible_timeout(wq, condition, timeout)      \
+ ({                                                                    \