]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 14 Aug 2016 14:49:16 +0000 (16:49 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 14 Aug 2016 14:49:16 +0000 (16:49 +0200)
added patches:
mm-migrate-dirty-page-without-clear_page_dirty_for_io-etc.patch
printk-do-cond_resched-between-lines-while-outputting-to-consoles.patch
sctp-prevent-soft-lockup-when-sctp_accept-is-called-during-a-timeout-event.patch
usb-fix-invalid-memory-access-in-hub_activate.patch
x86-mm-add-barriers-and-document-switch_mm-vs-flush-synchronization.patch
x86-mm-improve-switch_mm-barrier-comments.patch

queue-3.14/mm-migrate-dirty-page-without-clear_page_dirty_for_io-etc.patch [new file with mode: 0644]
queue-3.14/printk-do-cond_resched-between-lines-while-outputting-to-consoles.patch [new file with mode: 0644]
queue-3.14/sctp-prevent-soft-lockup-when-sctp_accept-is-called-during-a-timeout-event.patch [new file with mode: 0644]
queue-3.14/series [new file with mode: 0644]
queue-3.14/usb-fix-invalid-memory-access-in-hub_activate.patch [new file with mode: 0644]
queue-3.14/x86-mm-add-barriers-and-document-switch_mm-vs-flush-synchronization.patch [new file with mode: 0644]
queue-3.14/x86-mm-improve-switch_mm-barrier-comments.patch [new file with mode: 0644]

diff --git a/queue-3.14/mm-migrate-dirty-page-without-clear_page_dirty_for_io-etc.patch b/queue-3.14/mm-migrate-dirty-page-without-clear_page_dirty_for_io-etc.patch
new file mode 100644 (file)
index 0000000..3e9ad2d
--- /dev/null
@@ -0,0 +1,158 @@
+From 42cb14b110a5698ccf26ce59c4441722605a3743 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 5 Nov 2015 18:50:05 -0800
+Subject: mm: migrate dirty page without clear_page_dirty_for_io etc
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 42cb14b110a5698ccf26ce59c4441722605a3743 upstream.
+
+clear_page_dirty_for_io() has accumulated writeback and memcg subtleties
+since v2.6.16 first introduced page migration; and the set_page_dirty()
+which completed its migration of PageDirty, later had to be moderated to
+__set_page_dirty_nobuffers(); then PageSwapBacked had to skip that too.
+
+No actual problems seen with this procedure recently, but if you look into
+what the clear_page_dirty_for_io(page)+set_page_dirty(newpage) is actually
+achieving, it turns out to be nothing more than moving the PageDirty flag,
+and its NR_FILE_DIRTY stat from one zone to another.
+
+It would be good to avoid a pile of irrelevant decrementations and
+incrementations, and improper event counting, and unnecessary descent of
+the radix_tree under tree_lock (to set the PAGECACHE_TAG_DIRTY which
+radix_tree_replace_slot() left in place anyway).
+
+Do the NR_FILE_DIRTY movement, like the other stats movements, while
+interrupts still disabled in migrate_page_move_mapping(); and don't even
+bother if the zone is the same.  Do the PageDirty movement there under
+tree_lock too, where old page is frozen and newpage not yet visible:
+bearing in mind that as soon as newpage becomes visible in radix_tree, an
+un-page-locked set_page_dirty() might interfere (or perhaps that's just
+not possible: anything doing so should already hold an additional
+reference to the old page, preventing its migration; but play safe).
+
+But we do still need to transfer PageDirty in migrate_page_copy(), for
+those who don't go the mapping route through migrate_page_move_mapping().
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Sasha Levin <sasha.levin@oracle.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[ciwillia@brocade.com: backported to 3.14: adjusted context]
+Signed-off-by: Charles (Chas) Williams <ciwillia@brocade.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/migrate.c |   51 +++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 31 insertions(+), 20 deletions(-)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -30,6 +30,7 @@
+ #include <linux/mempolicy.h>
+ #include <linux/vmalloc.h>
+ #include <linux/security.h>
++#include <linux/backing-dev.h>
+ #include <linux/memcontrol.h>
+ #include <linux/syscalls.h>
+ #include <linux/hugetlb.h>
+@@ -344,6 +345,8 @@ int migrate_page_move_mapping(struct add
+               struct buffer_head *head, enum migrate_mode mode,
+               int extra_count)
+ {
++      struct zone *oldzone, *newzone;
++      int dirty;
+       int expected_count = 1 + extra_count;
+       void **pslot;
+@@ -354,6 +357,9 @@ int migrate_page_move_mapping(struct add
+               return MIGRATEPAGE_SUCCESS;
+       }
++      oldzone = page_zone(page);
++      newzone = page_zone(newpage);
++
+       spin_lock_irq(&mapping->tree_lock);
+       pslot = radix_tree_lookup_slot(&mapping->page_tree,
+@@ -394,6 +400,13 @@ int migrate_page_move_mapping(struct add
+               set_page_private(newpage, page_private(page));
+       }
++      /* Move dirty while page refs frozen and newpage not yet exposed */
++      dirty = PageDirty(page);
++      if (dirty) {
++              ClearPageDirty(page);
++              SetPageDirty(newpage);
++      }
++
+       radix_tree_replace_slot(pslot, newpage);
+       /*
+@@ -403,6 +416,9 @@ int migrate_page_move_mapping(struct add
+        */
+       page_unfreeze_refs(page, expected_count - 1);
++      spin_unlock(&mapping->tree_lock);
++      /* Leave irq disabled to prevent preemption while updating stats */
++
+       /*
+        * If moved to a different zone then also account
+        * the page for that zone. Other VM counters will be
+@@ -413,13 +429,19 @@ int migrate_page_move_mapping(struct add
+        * via NR_FILE_PAGES and NR_ANON_PAGES if they
+        * are mapped to swap space.
+        */
+-      __dec_zone_page_state(page, NR_FILE_PAGES);
+-      __inc_zone_page_state(newpage, NR_FILE_PAGES);
+-      if (!PageSwapCache(page) && PageSwapBacked(page)) {
+-              __dec_zone_page_state(page, NR_SHMEM);
+-              __inc_zone_page_state(newpage, NR_SHMEM);
++      if (newzone != oldzone) {
++              __dec_zone_state(oldzone, NR_FILE_PAGES);
++              __inc_zone_state(newzone, NR_FILE_PAGES);
++              if (PageSwapBacked(page) && !PageSwapCache(page)) {
++                      __dec_zone_state(oldzone, NR_SHMEM);
++                      __inc_zone_state(newzone, NR_SHMEM);
++              }
++              if (dirty && mapping_cap_account_dirty(mapping)) {
++                      __dec_zone_state(oldzone, NR_FILE_DIRTY);
++                      __inc_zone_state(newzone, NR_FILE_DIRTY);
++              }
+       }
+-      spin_unlock_irq(&mapping->tree_lock);
++      local_irq_enable();
+       return MIGRATEPAGE_SUCCESS;
+ }
+@@ -544,20 +566,9 @@ void migrate_page_copy(struct page *newp
+       if (PageMappedToDisk(page))
+               SetPageMappedToDisk(newpage);
+-      if (PageDirty(page)) {
+-              clear_page_dirty_for_io(page);
+-              /*
+-               * Want to mark the page and the radix tree as dirty, and
+-               * redo the accounting that clear_page_dirty_for_io undid,
+-               * but we can't use set_page_dirty because that function
+-               * is actually a signal that all of the page has become dirty.
+-               * Whereas only part of our page may be dirty.
+-               */
+-              if (PageSwapBacked(page))
+-                      SetPageDirty(newpage);
+-              else
+-                      __set_page_dirty_nobuffers(newpage);
+-      }
++      /* Move dirty on pages not done by migrate_page_move_mapping() */
++      if (PageDirty(page))
++              SetPageDirty(newpage);
+       /*
+        * Copy NUMA information to the new page, to prevent over-eager
diff --git a/queue-3.14/printk-do-cond_resched-between-lines-while-outputting-to-consoles.patch b/queue-3.14/printk-do-cond_resched-between-lines-while-outputting-to-consoles.patch
new file mode 100644 (file)
index 0000000..26666b9
--- /dev/null
@@ -0,0 +1,142 @@
+From 8d91f8b15361dfb438ab6eb3b319e2ded43458ff Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Fri, 15 Jan 2016 16:58:24 -0800
+Subject: printk: do cond_resched() between lines while outputting to consoles
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 8d91f8b15361dfb438ab6eb3b319e2ded43458ff upstream.
+
+@console_may_schedule tracks whether console_sem was acquired through
+lock or trylock.  If the former, we're inside a sleepable context and
+console_conditional_schedule() performs cond_resched().  This allows
+console drivers which use console_lock for synchronization to yield
+while performing time-consuming operations such as scrolling.
+
+However, the actual console outputting is performed while holding
+irq-safe logbuf_lock, so console_unlock() clears @console_may_schedule
+before starting outputting lines.  Also, only a few drivers call
+console_conditional_schedule() to begin with.  This means that when a
+lot of lines need to be output by console_unlock(), for example on a
+console registration, the task doing console_unlock() may not yield for
+a long time on a non-preemptible kernel.
+
+If this happens with a slow console devices, for example a serial
+console, the outputting task may occupy the cpu for a very long time.
+Long enough to trigger softlockup and/or RCU stall warnings, which in
+turn pile more messages, sometimes enough to trigger the next cycle of
+warnings incapacitating the system.
+
+Fix it by making console_unlock() insert cond_resched() between lines if
+@console_may_schedule.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: Calvin Owens <calvinowens@fb.com>
+Acked-by: Jan Kara <jack@suse.com>
+Cc: Dave Jones <davej@codemonkey.org.uk>
+Cc: Kyle McMartin <kyle@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[ciwillia@brocade.com: adjust context for 3.14.y]
+Signed-off-by: Chas Williams <ciwillia@brocade.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/console.h |    1 +
+ kernel/panic.c          |    3 +++
+ kernel/printk/printk.c  |   35 ++++++++++++++++++++++++++++++++++-
+ 3 files changed, 38 insertions(+), 1 deletion(-)
+
+--- a/include/linux/console.h
++++ b/include/linux/console.h
+@@ -150,6 +150,7 @@ extern int console_trylock(void);
+ extern void console_unlock(void);
+ extern void console_conditional_schedule(void);
+ extern void console_unblank(void);
++extern void console_flush_on_panic(void);
+ extern struct tty_driver *console_device(int *);
+ extern void console_stop(struct console *);
+ extern void console_start(struct console *);
+--- a/kernel/panic.c
++++ b/kernel/panic.c
+@@ -23,6 +23,7 @@
+ #include <linux/sysrq.h>
+ #include <linux/init.h>
+ #include <linux/nmi.h>
++#include <linux/console.h>
+ #define PANIC_TIMER_STEP 100
+ #define PANIC_BLINK_SPD 18
+@@ -133,6 +134,8 @@ void panic(const char *fmt, ...)
+       bust_spinlocks(0);
++      console_flush_on_panic();
++
+       if (!panic_blink)
+               panic_blink = no_blink;
+--- a/kernel/printk/printk.c
++++ b/kernel/printk/printk.c
+@@ -2011,13 +2011,24 @@ void console_unlock(void)
+       static u64 seen_seq;
+       unsigned long flags;
+       bool wake_klogd = false;
+-      bool retry;
++      bool do_cond_resched, retry;
+       if (console_suspended) {
+               up(&console_sem);
+               return;
+       }
++      /*
++       * Console drivers are called under logbuf_lock, so
++       * @console_may_schedule should be cleared before; however, we may
++       * end up dumping a lot of lines, for example, if called from
++       * console registration path, and should invoke cond_resched()
++       * between lines if allowable.  Not doing so can cause a very long
++       * scheduling stall on a slow console leading to RCU stall and
++       * softlockup warnings which exacerbate the issue with more
++       * messages practically incapacitating the system.
++       */
++      do_cond_resched = console_may_schedule;
+       console_may_schedule = 0;
+       /* flush buffered message fragment immediately to console */
+@@ -2074,6 +2085,9 @@ skip:
+               call_console_drivers(level, text, len);
+               start_critical_timings();
+               local_irq_restore(flags);
++
++              if (do_cond_resched)
++                      cond_resched();
+       }
+       console_locked = 0;
+       mutex_release(&console_lock_dep_map, 1, _RET_IP_);
+@@ -2142,6 +2156,25 @@ void console_unblank(void)
+       console_unlock();
+ }
++/**
++ * console_flush_on_panic - flush console content on panic
++ *
++ * Immediately output all pending messages no matter what.
++ */
++void console_flush_on_panic(void)
++{
++      /*
++       * If someone else is holding the console lock, trylock will fail
++       * and may_schedule may be set.  Ignore and proceed to unlock so
++       * that messages are flushed out.  As this can be called from any
++       * context and we don't want to get preempted while flushing,
++       * ensure may_schedule is cleared.
++       */
++      console_trylock();
++      console_may_schedule = 0;
++      console_unlock();
++}
++
+ /*
+  * Return the console tty driver structure and its associated index
+  */
diff --git a/queue-3.14/sctp-prevent-soft-lockup-when-sctp_accept-is-called-during-a-timeout-event.patch b/queue-3.14/sctp-prevent-soft-lockup-when-sctp_accept-is-called-during-a-timeout-event.patch
new file mode 100644 (file)
index 0000000..aec8a32
--- /dev/null
@@ -0,0 +1,195 @@
+From 635682a14427d241bab7bbdeebb48a7d7b91638e Mon Sep 17 00:00:00 2001
+From: Karl Heiss <kheiss@gmail.com>
+Date: Thu, 24 Sep 2015 12:15:07 -0400
+Subject: sctp: Prevent soft lockup when sctp_accept() is called during a timeout event
+
+From: Karl Heiss <kheiss@gmail.com>
+
+commit 635682a14427d241bab7bbdeebb48a7d7b91638e upstream.
+
+A case can occur when sctp_accept() is called by the user during
+a heartbeat timeout event after the 4-way handshake.  Since
+sctp_assoc_migrate() changes both assoc->base.sk and assoc->ep, the
+bh_sock_lock in sctp_generate_heartbeat_event() will be taken with
+the listening socket but released with the new association socket.
+The result is a deadlock on any future attempts to take the listening
+socket lock.
+
+Note that this race can occur with other SCTP timeouts that take
+the bh_lock_sock() in the event sctp_accept() is called.
+
+ BUG: soft lockup - CPU#9 stuck for 67s! [swapper:0]
+ ...
+ RIP: 0010:[<ffffffff8152d48e>]  [<ffffffff8152d48e>] _spin_lock+0x1e/0x30
+ RSP: 0018:ffff880028323b20  EFLAGS: 00000206
+ RAX: 0000000000000002 RBX: ffff880028323b20 RCX: 0000000000000000
+ RDX: 0000000000000000 RSI: ffff880028323be0 RDI: ffff8804632c4b48
+ RBP: ffffffff8100bb93 R08: 0000000000000000 R09: 0000000000000000
+ R10: ffff880610662280 R11: 0000000000000100 R12: ffff880028323aa0
+ R13: ffff8804383c3880 R14: ffff880028323a90 R15: ffffffff81534225
+ FS:  0000000000000000(0000) GS:ffff880028320000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
+ CR2: 00000000006df528 CR3: 0000000001a85000 CR4: 00000000000006e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
+ Process swapper (pid: 0, threadinfo ffff880616b70000, task ffff880616b6cab0)
+ Stack:
+ ffff880028323c40 ffffffffa01c2582 ffff880614cfb020 0000000000000000
+ <d> 0100000000000000 00000014383a6c44 ffff8804383c3880 ffff880614e93c00
+ <d> ffff880614e93c00 0000000000000000 ffff8804632c4b00 ffff8804383c38b8
+ Call Trace:
+ <IRQ>
+ [<ffffffffa01c2582>] ? sctp_rcv+0x492/0xa10 [sctp]
+ [<ffffffff8148c559>] ? nf_iterate+0x69/0xb0
+ [<ffffffff814974a0>] ? ip_local_deliver_finish+0x0/0x2d0
+ [<ffffffff8148c716>] ? nf_hook_slow+0x76/0x120
+ [<ffffffff814974a0>] ? ip_local_deliver_finish+0x0/0x2d0
+ [<ffffffff8149757d>] ? ip_local_deliver_finish+0xdd/0x2d0
+ [<ffffffff81497808>] ? ip_local_deliver+0x98/0xa0
+ [<ffffffff81496ccd>] ? ip_rcv_finish+0x12d/0x440
+ [<ffffffff81497255>] ? ip_rcv+0x275/0x350
+ [<ffffffff8145cfeb>] ? __netif_receive_skb+0x4ab/0x750
+ ...
+
+With lockdep debugging:
+
+ =====================================
+ [ BUG: bad unlock balance detected! ]
+ -------------------------------------
+ CslRx/12087 is trying to release lock (slock-AF_INET) at:
+ [<ffffffffa01bcae0>] sctp_generate_timeout_event+0x40/0xe0 [sctp]
+ but there are no more locks to release!
+
+ other info that might help us debug this:
+ 2 locks held by CslRx/12087:
+ #0:  (&asoc->timers[i]){+.-...}, at: [<ffffffff8108ce1f>] run_timer_softirq+0x16f/0x3e0
+ #1:  (slock-AF_INET){+.-...}, at: [<ffffffffa01bcac3>] sctp_generate_timeout_event+0x23/0xe0 [sctp]
+
+Ensure the socket taken is also the same one that is released by
+saving a copy of the socket before entering the timeout event
+critical section.
+
+Signed-off-by: Karl Heiss <kheiss@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Luis Henriques <luis.henriques@canonical.com>
+(cherry picked from commit 013dd9e038723bbd2aa67be51847384b75be8253)
+Signed-off-by: Chas Williams <3chas3@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/sm_sideeffect.c |   42 +++++++++++++++++++++++-------------------
+ 1 file changed, 23 insertions(+), 19 deletions(-)
+
+--- a/net/sctp/sm_sideeffect.c
++++ b/net/sctp/sm_sideeffect.c
+@@ -244,12 +244,13 @@ void sctp_generate_t3_rtx_event(unsigned
+       int error;
+       struct sctp_transport *transport = (struct sctp_transport *) peer;
+       struct sctp_association *asoc = transport->asoc;
+-      struct net *net = sock_net(asoc->base.sk);
++      struct sock *sk = asoc->base.sk;
++      struct net *net = sock_net(sk);
+       /* Check whether a task is in the sock.  */
+-      bh_lock_sock(asoc->base.sk);
+-      if (sock_owned_by_user(asoc->base.sk)) {
++      bh_lock_sock(sk);
++      if (sock_owned_by_user(sk)) {
+               pr_debug("%s: sock is busy\n", __func__);
+               /* Try again later.  */
+@@ -272,10 +273,10 @@ void sctp_generate_t3_rtx_event(unsigned
+                          transport, GFP_ATOMIC);
+       if (error)
+-              asoc->base.sk->sk_err = -error;
++              sk->sk_err = -error;
+ out_unlock:
+-      bh_unlock_sock(asoc->base.sk);
++      bh_unlock_sock(sk);
+       sctp_transport_put(transport);
+ }
+@@ -285,11 +286,12 @@ out_unlock:
+ static void sctp_generate_timeout_event(struct sctp_association *asoc,
+                                       sctp_event_timeout_t timeout_type)
+ {
+-      struct net *net = sock_net(asoc->base.sk);
++      struct sock *sk = asoc->base.sk;
++      struct net *net = sock_net(sk);
+       int error = 0;
+-      bh_lock_sock(asoc->base.sk);
+-      if (sock_owned_by_user(asoc->base.sk)) {
++      bh_lock_sock(sk);
++      if (sock_owned_by_user(sk)) {
+               pr_debug("%s: sock is busy: timer %d\n", __func__,
+                        timeout_type);
+@@ -312,10 +314,10 @@ static void sctp_generate_timeout_event(
+                          (void *)timeout_type, GFP_ATOMIC);
+       if (error)
+-              asoc->base.sk->sk_err = -error;
++              sk->sk_err = -error;
+ out_unlock:
+-      bh_unlock_sock(asoc->base.sk);
++      bh_unlock_sock(sk);
+       sctp_association_put(asoc);
+ }
+@@ -365,10 +367,11 @@ void sctp_generate_heartbeat_event(unsig
+       int error = 0;
+       struct sctp_transport *transport = (struct sctp_transport *) data;
+       struct sctp_association *asoc = transport->asoc;
+-      struct net *net = sock_net(asoc->base.sk);
++      struct sock *sk = asoc->base.sk;
++      struct net *net = sock_net(sk);
+-      bh_lock_sock(asoc->base.sk);
+-      if (sock_owned_by_user(asoc->base.sk)) {
++      bh_lock_sock(sk);
++      if (sock_owned_by_user(sk)) {
+               pr_debug("%s: sock is busy\n", __func__);
+               /* Try again later.  */
+@@ -389,10 +392,10 @@ void sctp_generate_heartbeat_event(unsig
+                          transport, GFP_ATOMIC);
+        if (error)
+-               asoc->base.sk->sk_err = -error;
++              sk->sk_err = -error;
+ out_unlock:
+-      bh_unlock_sock(asoc->base.sk);
++      bh_unlock_sock(sk);
+       sctp_transport_put(transport);
+ }
+@@ -403,10 +406,11 @@ void sctp_generate_proto_unreach_event(u
+ {
+       struct sctp_transport *transport = (struct sctp_transport *) data;
+       struct sctp_association *asoc = transport->asoc;
+-      struct net *net = sock_net(asoc->base.sk);
++      struct sock *sk = asoc->base.sk;
++      struct net *net = sock_net(sk);
+-      bh_lock_sock(asoc->base.sk);
+-      if (sock_owned_by_user(asoc->base.sk)) {
++      bh_lock_sock(sk);
++      if (sock_owned_by_user(sk)) {
+               pr_debug("%s: sock is busy\n", __func__);
+               /* Try again later.  */
+@@ -427,7 +431,7 @@ void sctp_generate_proto_unreach_event(u
+                  asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC);
+ out_unlock:
+-      bh_unlock_sock(asoc->base.sk);
++      bh_unlock_sock(sk);
+       sctp_association_put(asoc);
+ }
diff --git a/queue-3.14/series b/queue-3.14/series
new file mode 100644 (file)
index 0000000..7cdb5a5
--- /dev/null
@@ -0,0 +1,6 @@
+usb-fix-invalid-memory-access-in-hub_activate.patch
+mm-migrate-dirty-page-without-clear_page_dirty_for_io-etc.patch
+printk-do-cond_resched-between-lines-while-outputting-to-consoles.patch
+x86-mm-add-barriers-and-document-switch_mm-vs-flush-synchronization.patch
+sctp-prevent-soft-lockup-when-sctp_accept-is-called-during-a-timeout-event.patch
+x86-mm-improve-switch_mm-barrier-comments.patch
diff --git a/queue-3.14/usb-fix-invalid-memory-access-in-hub_activate.patch b/queue-3.14/usb-fix-invalid-memory-access-in-hub_activate.patch
new file mode 100644 (file)
index 0000000..cc9d554
--- /dev/null
@@ -0,0 +1,94 @@
+From e50293ef9775c5f1cf3fcc093037dd6a8c5684ea Mon Sep 17 00:00:00 2001
+From: Alan Stern <stern@rowland.harvard.edu>
+Date: Wed, 16 Dec 2015 13:32:38 -0500
+Subject: USB: fix invalid memory access in hub_activate()
+
+From: Alan Stern <stern@rowland.harvard.edu>
+
+commit e50293ef9775c5f1cf3fcc093037dd6a8c5684ea upstream.
+
+Commit 8520f38099cc ("USB: change hub initialization sleeps to
+delayed_work") changed the hub_activate() routine to make part of it
+run in a workqueue.  However, the commit failed to take a reference to
+the usb_hub structure or to lock the hub interface while doing so.  As
+a result, if a hub is plugged in and quickly unplugged before the work
+routine can run, the routine will try to access memory that has been
+deallocated.  Or, if the hub is unplugged while the routine is
+running, the memory may be deallocated while it is in active use.
+
+This patch fixes the problem by taking a reference to the usb_hub at
+the start of hub_activate() and releasing it at the end (when the work
+is finished), and by locking the hub interface while the work routine
+is running.  It also adds a check at the start of the routine to see
+if the hub has already been disconnected, in which nothing should be
+done.
+
+Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
+Reported-by: Alexandru Cornea <alexandru.cornea@intel.com>
+Tested-by: Alexandru Cornea <alexandru.cornea@intel.com>
+Fixes: 8520f38099cc ("USB: change hub initialization sleeps to delayed_work")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[ luis: backported to 3.16:
+  - Added forward declaration of hub_release() which mainline had with commit
+    32a6958998c5 ("usb: hub: convert khubd into workqueue") ]
+Signed-off-by: Luis Henriques <luis.henriques@canonical.com>
+Signed-off-by: Charles (Chas) Williams <ciwillia@brocade.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/core/hub.c |   23 ++++++++++++++++++++---
+ 1 file changed, 20 insertions(+), 3 deletions(-)
+
+--- a/drivers/usb/core/hub.c
++++ b/drivers/usb/core/hub.c
+@@ -106,6 +106,7 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rws
+ #define HUB_DEBOUNCE_STEP       25
+ #define HUB_DEBOUNCE_STABLE    100
++static void hub_release(struct kref *kref);
+ static int usb_reset_and_verify_device(struct usb_device *udev);
+ static inline char *portspeed(struct usb_hub *hub, int portstatus)
+@@ -1023,10 +1024,20 @@ static void hub_activate(struct usb_hub
+       unsigned delay;
+       /* Continue a partial initialization */
+-      if (type == HUB_INIT2)
+-              goto init2;
+-      if (type == HUB_INIT3)
++      if (type == HUB_INIT2 || type == HUB_INIT3) {
++              device_lock(hub->intfdev);
++
++              /* Was the hub disconnected while we were waiting? */
++              if (hub->disconnected) {
++                      device_unlock(hub->intfdev);
++                      kref_put(&hub->kref, hub_release);
++                      return;
++              }
++              if (type == HUB_INIT2)
++                      goto init2;
+               goto init3;
++      }
++      kref_get(&hub->kref);
+       /* The superspeed hub except for root hub has to use Hub Depth
+        * value as an offset into the route string to locate the bits
+@@ -1220,6 +1231,7 @@ static void hub_activate(struct usb_hub
+                       PREPARE_DELAYED_WORK(&hub->init_work, hub_init_func3);
+                       schedule_delayed_work(&hub->init_work,
+                                       msecs_to_jiffies(delay));
++                      device_unlock(hub->intfdev);
+                       return;         /* Continues at init3: below */
+               } else {
+                       msleep(delay);
+@@ -1240,6 +1252,11 @@ static void hub_activate(struct usb_hub
+       /* Allow autosuspend if it was suppressed */
+       if (type <= HUB_INIT3)
+               usb_autopm_put_interface_async(to_usb_interface(hub->intfdev));
++
++      if (type == HUB_INIT2 || type == HUB_INIT3)
++              device_unlock(hub->intfdev);
++
++      kref_put(&hub->kref, hub_release);
+ }
+ /* Implement the continuations for the delays above */
diff --git a/queue-3.14/x86-mm-add-barriers-and-document-switch_mm-vs-flush-synchronization.patch b/queue-3.14/x86-mm-add-barriers-and-document-switch_mm-vs-flush-synchronization.patch
new file mode 100644 (file)
index 0000000..0009321
--- /dev/null
@@ -0,0 +1,151 @@
+From 71b3c126e61177eb693423f2e18a1914205b165e Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Wed, 6 Jan 2016 12:21:01 -0800
+Subject: x86/mm: Add barriers and document switch_mm()-vs-flush synchronization
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 71b3c126e61177eb693423f2e18a1914205b165e upstream.
+
+When switch_mm() activates a new PGD, it also sets a bit that
+tells other CPUs that the PGD is in use so that TLB flush IPIs
+will be sent.  In order for that to work correctly, the bit
+needs to be visible prior to loading the PGD and therefore
+starting to fill the local TLB.
+
+Document all the barriers that make this work correctly and add
+a couple that were missing.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-mm@kvack.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+[ luis: backported to 3.16:
+  - dropped N/A comment in flush_tlb_mm_range()
+  - adjusted context ]
+Signed-off-by: Luis Henriques <luis.henriques@canonical.com>
+[ciwillia@brocade.com: backported to 3.14: adjusted context]
+Signed-off-by: Charles (Chas) Williams <ciwillia@brocade.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/mmu_context.h |   32 +++++++++++++++++++++++++++++++-
+ arch/x86/mm/tlb.c                  |   25 ++++++++++++++++++++++---
+ 2 files changed, 53 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -42,7 +42,32 @@ static inline void switch_mm(struct mm_s
+ #endif
+               cpumask_set_cpu(cpu, mm_cpumask(next));
+-              /* Re-load page tables */
++              /*
++               * Re-load page tables.
++               *
++               * This logic has an ordering constraint:
++               *
++               *  CPU 0: Write to a PTE for 'next'
++               *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
++               *  CPU 1: set bit 1 in next's mm_cpumask
++               *  CPU 1: load from the PTE that CPU 0 writes (implicit)
++               *
++               * We need to prevent an outcome in which CPU 1 observes
++               * the new PTE value and CPU 0 observes bit 1 clear in
++               * mm_cpumask.  (If that occurs, then the IPI will never
++               * be sent, and CPU 0's TLB will contain a stale entry.)
++               *
++               * The bad outcome can occur if either CPU's load is
++               * reordered before that CPU's store, so both CPUs much
++               * execute full barriers to prevent this from happening.
++               *
++               * Thus, switch_mm needs a full barrier between the
++               * store to mm_cpumask and any operation that could load
++               * from next->pgd.  This barrier synchronizes with
++               * remote TLB flushers.  Fortunately, load_cr3 is
++               * serializing and thus acts as a full barrier.
++               *
++               */
+               load_cr3(next->pgd);
+               /* Stop flush ipis for the previous mm */
+@@ -65,10 +90,15 @@ static inline void switch_mm(struct mm_s
+                        * schedule, protecting us from simultaneous changes.
+                        */
+                       cpumask_set_cpu(cpu, mm_cpumask(next));
++
+                       /*
+                        * We were in lazy tlb mode and leave_mm disabled
+                        * tlb flush IPI delivery. We must reload CR3
+                        * to make sure to use no freed page tables.
++                       *
++                       * As above, this is a barrier that forces
++                       * TLB repopulation to be ordered after the
++                       * store to mm_cpumask.
+                        */
+                       load_cr3(next->pgd);
+                       load_LDT_nolock(&next->context);
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -152,7 +152,10 @@ void flush_tlb_current_task(void)
+       preempt_disable();
+       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
++
++      /* This is an implicit full barrier that synchronizes with switch_mm. */
+       local_flush_tlb();
++
+       if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+               flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
+       preempt_enable();
+@@ -166,11 +169,19 @@ void flush_tlb_mm_range(struct mm_struct
+       unsigned long nr_base_pages;
+       preempt_disable();
+-      if (current->active_mm != mm)
++      if (current->active_mm != mm) {
++              /* Synchronize with switch_mm. */
++              smp_mb();
++
+               goto flush_all;
++      }
+       if (!current->mm) {
+               leave_mm(smp_processor_id());
++
++              /* Synchronize with switch_mm. */
++              smp_mb();
++
+               goto flush_all;
+       }
+@@ -222,10 +233,18 @@ void flush_tlb_page(struct vm_area_struc
+       preempt_disable();
+       if (current->active_mm == mm) {
+-              if (current->mm)
++              if (current->mm) {
++                      /*
++                       * Implicit full barrier (INVLPG) that synchronizes
++                       * with switch_mm.
++                       */
+                       __flush_tlb_one(start);
+-              else
++              } else {
+                       leave_mm(smp_processor_id());
++
++                      /* Synchronize with switch_mm. */
++                      smp_mb();
++              }
+       }
+       if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
diff --git a/queue-3.14/x86-mm-improve-switch_mm-barrier-comments.patch b/queue-3.14/x86-mm-improve-switch_mm-barrier-comments.patch
new file mode 100644 (file)
index 0000000..d9b3fbd
--- /dev/null
@@ -0,0 +1,68 @@
+From 4eaffdd5a5fe6ff9f95e1ab4de1ac904d5e0fa8b Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Tue, 12 Jan 2016 12:47:40 -0800
+Subject: x86/mm: Improve switch_mm() barrier comments
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 4eaffdd5a5fe6ff9f95e1ab4de1ac904d5e0fa8b upstream.
+
+My previous comments were still a bit confusing and there was a
+typo. Fix it up.
+
+Reported-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Fixes: 71b3c126e611 ("x86/mm: Add barriers and document switch_mm()-vs-flush synchronization")
+Link: http://lkml.kernel.org/r/0a0b43cdcdd241c5faaaecfbcc91a155ddedc9a1.1452631609.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/mmu_context.h |   15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -58,14 +58,16 @@ static inline void switch_mm(struct mm_s
+                * be sent, and CPU 0's TLB will contain a stale entry.)
+                *
+                * The bad outcome can occur if either CPU's load is
+-               * reordered before that CPU's store, so both CPUs much
++               * reordered before that CPU's store, so both CPUs must
+                * execute full barriers to prevent this from happening.
+                *
+                * Thus, switch_mm needs a full barrier between the
+                * store to mm_cpumask and any operation that could load
+-               * from next->pgd.  This barrier synchronizes with
+-               * remote TLB flushers.  Fortunately, load_cr3 is
+-               * serializing and thus acts as a full barrier.
++               * from next->pgd.  TLB fills are special and can happen
++               * due to instruction fetches or for no reason at all,
++               * and neither LOCK nor MFENCE orders them.
++               * Fortunately, load_cr3() is serializing and gives the
++               * ordering guarantee we need.
+                *
+                */
+               load_cr3(next->pgd);
+@@ -96,9 +98,8 @@ static inline void switch_mm(struct mm_s
+                        * tlb flush IPI delivery. We must reload CR3
+                        * to make sure to use no freed page tables.
+                        *
+-                       * As above, this is a barrier that forces
+-                       * TLB repopulation to be ordered after the
+-                       * store to mm_cpumask.
++                       * As above, load_cr3() is serializing and orders TLB
++                       * fills with respect to the mm_cpumask write.
+                        */
+                       load_cr3(next->pgd);
+                       load_LDT_nolock(&next->context);