From: Greg Kroah-Hartman Date: Sat, 4 Jul 2015 03:04:12 +0000 (-0700) Subject: 3.14-stable patches X-Git-Tag: v4.0.8~16 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9aaa3852dd8f2342287799c053cd7a06da7c022e;p=thirdparty%2Fkernel%2Fstable-queue.git 3.14-stable patches added patches: bridge-fix-br_stp_set_bridge_priority-race-conditions.patch bridge-fix-multicast-router-rlist-endless-loop.patch neigh-do-not-modify-unlinked-entries.patch net-don-t-wait-for-order-3-page-allocation.patch net-phy-fix-phy-link-up-when-limiting-speed-via-device-tree.patch packet-avoid-out-of-bounds-read-in-round-robin-fanout.patch packet-read-num_members-once-in-packet_rcv_fanout.patch sctp-fix-asconf-list-handling.patch sctp-fix-race-between-ootb-responce-and-route-removal.patch sparc-Use-GFP_ATOMIC-in-ldc_alloc_exp_dring-as-it-can-be-called-in-softirq-context.patch tcp-do-not-call-tcp_fastopen_reset_cipher-from-interrupt-context.patch --- diff --git a/queue-3.14/bridge-fix-br_stp_set_bridge_priority-race-conditions.patch b/queue-3.14/bridge-fix-br_stp_set_bridge_priority-race-conditions.patch new file mode 100644 index 00000000000..f0502badf15 --- /dev/null +++ b/queue-3.14/bridge-fix-br_stp_set_bridge_priority-race-conditions.patch @@ -0,0 +1,68 @@ +From foo@baz Fri Jul 3 19:59:07 PDT 2015 +From: Nikolay Aleksandrov +Date: Mon, 15 Jun 2015 20:28:51 +0300 +Subject: bridge: fix br_stp_set_bridge_priority race conditions + +From: Nikolay Aleksandrov + +[ Upstream commit 2dab80a8b486f02222a69daca6859519e05781d9 ] + +After the ->set() spinlocks were removed br_stp_set_bridge_priority +was left running without any protection when used via sysfs. It can +race with port add/del and could result in use-after-free cases and +corrupted lists. Tested by running port add/del in a loop with stp +enabled while setting priority in a loop, crashes are easily +reproducible. +The spinlocks around sysfs ->set() were removed in commit: +14f98f258f19 ("bridge: range check STP parameters") +There's also a race condition in the netlink priority support that is +fixed by this change, but it was introduced recently and the fixes tag +covers it, just in case it's needed the commit is: +af615762e972 ("bridge: add ageing_time, stp_state, priority over netlink") + +Signed-off-by: Nikolay Aleksandrov +Fixes: 14f98f258f19 ("bridge: range check STP parameters") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_ioctl.c | 2 -- + net/bridge/br_stp_if.c | 4 +++- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/net/bridge/br_ioctl.c ++++ b/net/bridge/br_ioctl.c +@@ -247,9 +247,7 @@ static int old_dev_ioctl(struct net_devi + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + +- spin_lock_bh(&br->lock); + br_stp_set_bridge_priority(br, args[1]); +- spin_unlock_bh(&br->lock); + return 0; + + case BRCTL_SET_PORT_PRIORITY: +--- a/net/bridge/br_stp_if.c ++++ b/net/bridge/br_stp_if.c +@@ -243,12 +243,13 @@ bool br_stp_recalculate_bridge_id(struct + return true; + } + +-/* called under bridge lock */ ++/* Acquires and releases bridge lock */ + void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) + { + struct net_bridge_port *p; + int wasroot; + ++ spin_lock_bh(&br->lock); + wasroot = br_is_root_bridge(br); + + list_for_each_entry(p, &br->port_list, list) { +@@ -266,6 +267,7 @@ void br_stp_set_bridge_priority(struct n + br_port_state_selection(br); + if (br_is_root_bridge(br) && !wasroot) + br_become_root_bridge(br); ++ spin_unlock_bh(&br->lock); + } + + /* called under bridge lock */ diff --git a/queue-3.14/bridge-fix-multicast-router-rlist-endless-loop.patch b/queue-3.14/bridge-fix-multicast-router-rlist-endless-loop.patch new file mode 100644 index 00000000000..120466ccbe1 --- /dev/null +++ b/queue-3.14/bridge-fix-multicast-router-rlist-endless-loop.patch @@ -0,0 +1,57 @@ +From foo@baz Fri Jul 3 19:59:07 PDT 2015 +From: Nikolay Aleksandrov +Date: Tue, 9 Jun 2015 10:23:57 -0700 +Subject: bridge: fix multicast router rlist endless loop + +From: Nikolay Aleksandrov + +[ Upstream commit 1a040eaca1a22f8da8285ceda6b5e4a2cb704867 ] + +Since the addition of sysfs multicast router support if one set +multicast_router to "2" more than once, then the port would be added to +the hlist every time and could end up linking to itself and thus causing an +endless loop for rlist walkers. +So to reproduce just do: +echo 2 > multicast_router; echo 2 > multicast_router; +in a bridge port and let some igmp traffic flow, for me it hangs up +in br_multicast_flood(). +Fix this by adding a check in br_multicast_add_router() if the port is +already linked. +The reason this didn't happen before the addition of multicast_router +sysfs entries is because there's a !hlist_unhashed check that prevents +it. + +Signed-off-by: Nikolay Aleksandrov +Fixes: 0909e11758bd ("bridge: Add multicast_router sysfs entries") +Acked-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_multicast.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -1086,6 +1086,9 @@ static void br_multicast_add_router(stru + struct net_bridge_port *p; + struct hlist_node *slot = NULL; + ++ if (!hlist_unhashed(&port->rlist)) ++ return; ++ + hlist_for_each_entry(p, &br->router_list, rlist) { + if ((unsigned long) port >= (unsigned long) p) + break; +@@ -1113,12 +1116,8 @@ static void br_multicast_mark_router(str + if (port->multicast_router != 1) + return; + +- if (!hlist_unhashed(&port->rlist)) +- goto timer; +- + br_multicast_add_router(br, port); + +-timer: + mod_timer(&port->multicast_router_timer, + now + br->multicast_querier_interval); + } diff --git a/queue-3.14/neigh-do-not-modify-unlinked-entries.patch b/queue-3.14/neigh-do-not-modify-unlinked-entries.patch new file mode 100644 index 00000000000..a6726cac96f --- /dev/null +++ b/queue-3.14/neigh-do-not-modify-unlinked-entries.patch @@ -0,0 +1,83 @@ +From foo@baz Fri Jul 3 19:59:07 PDT 2015 +From: Julian Anastasov +Date: Tue, 16 Jun 2015 22:56:39 +0300 +Subject: neigh: do not modify unlinked entries + +From: Julian Anastasov + +[ Upstream commit 2c51a97f76d20ebf1f50fef908b986cb051fdff9 ] + +The lockless lookups can return entry that is unlinked. +Sometimes they get reference before last neigh_cleanup_and_release, +sometimes they do not need reference. Later, any +modification attempts may result in the following problems: + +1. entry is not destroyed immediately because neigh_update +can start the timer for dead entry, eg. on change to NUD_REACHABLE +state. As result, entry lives for some time but is invisible +and out of control. + +2. __neigh_event_send can run in parallel with neigh_destroy +while refcnt=0 but if timer is started and expired refcnt can +reach 0 for second time leading to second neigh_destroy and +possible crash. + +Thanks to Eric Dumazet and Ying Xue for their work and analyze +on the __neigh_event_send change. + +Fixes: 767e97e1e0db ("neigh: RCU conversion of struct neighbour") +Fixes: a263b3093641 ("ipv4: Make neigh lookups directly in output packet path.") +Fixes: 6fd6ce2056de ("ipv6: Do not depend on rt->n in ip6_finish_output2().") +Cc: Eric Dumazet +Cc: Ying Xue +Signed-off-by: Julian Anastasov +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/neighbour.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/net/core/neighbour.c ++++ b/net/core/neighbour.c +@@ -976,6 +976,8 @@ int __neigh_event_send(struct neighbour + rc = 0; + if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) + goto out_unlock_bh; ++ if (neigh->dead) ++ goto out_dead; + + if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { + if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + +@@ -1032,6 +1034,13 @@ out_unlock_bh: + write_unlock(&neigh->lock); + local_bh_enable(); + return rc; ++ ++out_dead: ++ if (neigh->nud_state & NUD_STALE) ++ goto out_unlock_bh; ++ write_unlock_bh(&neigh->lock); ++ kfree_skb(skb); ++ return 1; + } + EXPORT_SYMBOL(__neigh_event_send); + +@@ -1095,6 +1104,8 @@ int neigh_update(struct neighbour *neigh + if (!(flags & NEIGH_UPDATE_F_ADMIN) && + (old & (NUD_NOARP | NUD_PERMANENT))) + goto out; ++ if (neigh->dead) ++ goto out; + + if (!(new & NUD_VALID)) { + neigh_del_timer(neigh); +@@ -1244,6 +1255,8 @@ EXPORT_SYMBOL(neigh_update); + */ + void __neigh_set_probe_once(struct neighbour *neigh) + { ++ if (neigh->dead) ++ return; + neigh->updated = jiffies; + if (!(neigh->nud_state & NUD_FAILED)) + return; diff --git a/queue-3.14/net-don-t-wait-for-order-3-page-allocation.patch b/queue-3.14/net-don-t-wait-for-order-3-page-allocation.patch new file mode 100644 index 00000000000..1b5322d15f4 --- /dev/null +++ b/queue-3.14/net-don-t-wait-for-order-3-page-allocation.patch @@ -0,0 +1,73 @@ +From foo@baz Fri Jul 3 19:59:07 PDT 2015 +From: Shaohua Li +Date: Thu, 11 Jun 2015 16:50:48 -0700 +Subject: net: don't wait for order-3 page allocation + +From: Shaohua Li + +[ Upstream commit fb05e7a89f500cfc06ae277bdc911b281928995d ] + +We saw excessive direct memory compaction triggered by skb_page_frag_refill. +This causes performance issues and add latency. Commit 5640f7685831e0 +introduces the order-3 allocation. According to the changelog, the order-3 +allocation isn't a must-have but to improve performance. But direct memory +compaction has high overhead. The benefit of order-3 allocation can't +compensate the overhead of direct memory compaction. + +This patch makes the order-3 page allocation atomic. If there is no memory +pressure and memory isn't fragmented, the alloction will still success, so we +don't sacrifice the order-3 benefit here. If the atomic allocation fails, +direct memory compaction will not be triggered, skb_page_frag_refill will +fallback to order-0 immediately, hence the direct memory compaction overhead is +avoided. In the allocation failure case, kswapd is waken up and doing +compaction, so chances are allocation could success next time. + +alloc_skb_with_frags is the same. + +The mellanox driver does similar thing, if this is accepted, we must fix +the driver too. + +V3: fix the same issue in alloc_skb_with_frags as pointed out by Eric +V2: make the changelog clearer + +Cc: Eric Dumazet +Cc: Chris Mason +Cc: Debabrata Banerjee +Signed-off-by: Shaohua Li +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 4 +++- + net/core/sock.c | 4 +++- + 2 files changed, 6 insertions(+), 2 deletions(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -368,9 +368,11 @@ refill: + for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) { + gfp_t gfp = gfp_mask; + +- if (order) ++ if (order) { + gfp |= __GFP_COMP | __GFP_NOWARN | + __GFP_NOMEMALLOC; ++ gfp &= ~__GFP_WAIT; ++ } + nc->frag.page = alloc_pages(gfp, order); + if (likely(nc->frag.page)) + break; +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1914,8 +1914,10 @@ bool skb_page_frag_refill(unsigned int s + do { + gfp_t gfp = prio; + +- if (order) ++ if (order) { + gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; ++ gfp &= ~__GFP_WAIT; ++ } + pfrag->page = alloc_pages(gfp, order); + if (likely(pfrag->page)) { + pfrag->offset = 0; diff --git a/queue-3.14/net-phy-fix-phy-link-up-when-limiting-speed-via-device-tree.patch b/queue-3.14/net-phy-fix-phy-link-up-when-limiting-speed-via-device-tree.patch new file mode 100644 index 00000000000..d7fe3258b4e --- /dev/null +++ b/queue-3.14/net-phy-fix-phy-link-up-when-limiting-speed-via-device-tree.patch @@ -0,0 +1,40 @@ +From foo@baz Fri Jul 3 19:59:07 PDT 2015 +From: Mugunthan V N +Date: Thu, 25 Jun 2015 22:21:02 +0530 +Subject: net: phy: fix phy link up when limiting speed via device tree + +From: Mugunthan V N + +[ Upstream commit eb686231fce3770299760f24fdcf5ad041f44153 ] + +When limiting phy link speed using "max-speed" to 100mbps or less on a +giga bit phy, phy never completes auto negotiation and phy state +machine is held in PHY_AN. Fixing this issue by comparing the giga +bit advertise though phydev->supported doesn't have it but phy has +BMSR_ESTATEN set. So that auto negotiation is restarted as old and +new advertise are different and link comes up fine. + +Signed-off-by: Mugunthan V N +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy_device.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -765,10 +765,11 @@ static int genphy_config_advert(struct p + if (phydev->supported & (SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full)) { + adv |= ethtool_adv_to_mii_ctrl1000_t(advertise); +- if (adv != oldadv) +- changed = 1; + } + ++ if (adv != oldadv) ++ changed = 1; ++ + err = phy_write(phydev, MII_CTRL1000, adv); + if (err < 0) + return err; diff --git a/queue-3.14/packet-avoid-out-of-bounds-read-in-round-robin-fanout.patch b/queue-3.14/packet-avoid-out-of-bounds-read-in-round-robin-fanout.patch new file mode 100644 index 00000000000..d5a7a00134f --- /dev/null +++ b/queue-3.14/packet-avoid-out-of-bounds-read-in-round-robin-fanout.patch @@ -0,0 +1,62 @@ +From foo@baz Fri Jul 3 19:59:07 PDT 2015 +From: Willem de Bruijn +Date: Wed, 17 Jun 2015 15:59:34 -0400 +Subject: packet: avoid out of bounds read in round robin fanout + +From: Willem de Bruijn + +[ Upstream commit 468479e6043c84f5a65299cc07cb08a22a28c2b1 ] + +PACKET_FANOUT_LB computes f->rr_cur such that it is modulo +f->num_members. It returns the old value unconditionally, but +f->num_members may have changed since the last store. Ensure +that the return value is always < num. + +When modifying the logic, simplify it further by replacing the loop +with an unconditional atomic increment. + +Fixes: dc99f600698d ("packet: Add fanout support.") +Suggested-by: Eric Dumazet +Signed-off-by: Willem de Bruijn +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 18 ++---------------- + 1 file changed, 2 insertions(+), 16 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -1264,16 +1264,6 @@ static void packet_sock_destruct(struct + sk_refcnt_debug_dec(sk); + } + +-static int fanout_rr_next(struct packet_fanout *f, unsigned int num) +-{ +- int x = atomic_read(&f->rr_cur) + 1; +- +- if (x >= num) +- x = 0; +- +- return x; +-} +- + static unsigned int fanout_demux_hash(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) +@@ -1285,13 +1275,9 @@ static unsigned int fanout_demux_lb(stru + struct sk_buff *skb, + unsigned int num) + { +- int cur, old; ++ unsigned int val = atomic_inc_return(&f->rr_cur); + +- cur = atomic_read(&f->rr_cur); +- while ((old = atomic_cmpxchg(&f->rr_cur, cur, +- fanout_rr_next(f, num))) != cur) +- cur = old; +- return cur; ++ return val % num; + } + + static unsigned int fanout_demux_cpu(struct packet_fanout *f, diff --git a/queue-3.14/packet-read-num_members-once-in-packet_rcv_fanout.patch b/queue-3.14/packet-read-num_members-once-in-packet_rcv_fanout.patch new file mode 100644 index 00000000000..f23f3c5361c --- /dev/null +++ b/queue-3.14/packet-read-num_members-once-in-packet_rcv_fanout.patch @@ -0,0 +1,37 @@ +From foo@baz Fri Jul 3 19:59:07 PDT 2015 +From: Eric Dumazet +Date: Tue, 16 Jun 2015 07:59:11 -0700 +Subject: packet: read num_members once in packet_rcv_fanout() + +From: Eric Dumazet + +[ Upstream commit f98f4514d07871da7a113dd9e3e330743fd70ae4 ] + +We need to tell compiler it must not read f->num_members multiple +times. Otherwise testing if num is not zero is flaky, and we could +attempt an invalid divide by 0 in fanout_demux_cpu() + +Note bug was present in packet_rcv_fanout_hash() and +packet_rcv_fanout_lb() but final 3.1 had a simple location +after commit 95ec3eb417115fb ("packet: Add 'cpu' fanout policy.") + +Fixes: dc99f600698dc ("packet: Add fanout support.") +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -1345,7 +1345,7 @@ static int packet_rcv_fanout(struct sk_b + struct packet_type *pt, struct net_device *orig_dev) + { + struct packet_fanout *f = pt->af_packet_priv; +- unsigned int num = f->num_members; ++ unsigned int num = ACCESS_ONCE(f->num_members); + struct packet_sock *po; + unsigned int idx; + diff --git a/queue-3.14/sctp-fix-asconf-list-handling.patch b/queue-3.14/sctp-fix-asconf-list-handling.patch new file mode 100644 index 00000000000..f8f5daf4cef --- /dev/null +++ b/queue-3.14/sctp-fix-asconf-list-handling.patch @@ -0,0 +1,184 @@ +From foo@baz Fri Jul 3 19:59:07 PDT 2015 +From: Marcelo Ricardo Leitner +Date: Fri, 12 Jun 2015 10:16:41 -0300 +Subject: sctp: fix ASCONF list handling + +From: Marcelo Ricardo Leitner + +[ Upstream commit 2d45a02d0166caf2627fe91897c6ffc3b19514c4 ] + +->auto_asconf_splist is per namespace and mangled by functions like +sctp_setsockopt_auto_asconf() which doesn't guarantee any serialization. + +Also, the call to inet_sk_copy_descendant() was backuping +->auto_asconf_list through the copy but was not honoring +->do_auto_asconf, which could lead to list corruption if it was +different between both sockets. + +This commit thus fixes the list handling by using ->addr_wq_lock +spinlock to protect the list. A special handling is done upon socket +creation and destruction for that. Error handlig on sctp_init_sock() +will never return an error after having initialized asconf, so +sctp_destroy_sock() can be called without addrq_wq_lock. The lock now +will be take on sctp_close_sock(), before locking the socket, so we +don't do it in inverse order compared to sctp_addr_wq_timeout_handler(). + +Instead of taking the lock on sctp_sock_migrate() for copying and +restoring the list values, it's preferred to avoid rewritting it by +implementing sctp_copy_descendant(). + +Issue was found with a test application that kept flipping sysctl +default_auto_asconf on and off, but one could trigger it by issuing +simultaneous setsockopt() calls on multiple sockets or by +creating/destroying sockets fast enough. This is only triggerable +locally. + +Fixes: 9f7d653b67ae ("sctp: Add Auto-ASCONF support (core).") +Reported-by: Ji Jianwen +Suggested-by: Neil Horman +Suggested-by: Hannes Frederic Sowa +Acked-by: Hannes Frederic Sowa +Signed-off-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netns/sctp.h | 1 + + include/net/sctp/structs.h | 4 ++++ + net/sctp/socket.c | 43 ++++++++++++++++++++++++++++++++----------- + 3 files changed, 37 insertions(+), 11 deletions(-) + +--- a/include/net/netns/sctp.h ++++ b/include/net/netns/sctp.h +@@ -31,6 +31,7 @@ struct netns_sctp { + struct list_head addr_waitq; + struct timer_list addr_wq_timer; + struct list_head auto_asconf_splist; ++ /* Lock that protects both addr_waitq and auto_asconf_splist */ + spinlock_t addr_wq_lock; + + /* Lock that protects the local_addr_list writers */ +--- a/include/net/sctp/structs.h ++++ b/include/net/sctp/structs.h +@@ -219,6 +219,10 @@ struct sctp_sock { + atomic_t pd_mode; + /* Receive to here while partial delivery is in effect. */ + struct sk_buff_head pd_lobby; ++ ++ /* These must be the last fields, as they will skipped on copies, ++ * like on accept and peeloff operations ++ */ + struct list_head auto_asconf_list; + int do_auto_asconf; + }; +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -1532,8 +1532,10 @@ static void sctp_close(struct sock *sk, + + /* Supposedly, no process has access to the socket, but + * the net layers still may. ++ * Also, sctp_destroy_sock() needs to be called with addr_wq_lock ++ * held and that should be grabbed before socket lock. + */ +- local_bh_disable(); ++ spin_lock_bh(&net->sctp.addr_wq_lock); + bh_lock_sock(sk); + + /* Hold the sock, since sk_common_release() will put sock_put() +@@ -1543,7 +1545,7 @@ static void sctp_close(struct sock *sk, + sk_common_release(sk); + + bh_unlock_sock(sk); +- local_bh_enable(); ++ spin_unlock_bh(&net->sctp.addr_wq_lock); + + sock_put(sk); + +@@ -3511,6 +3513,7 @@ static int sctp_setsockopt_auto_asconf(s + if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf)) + return 0; + ++ spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock); + if (val == 0 && sp->do_auto_asconf) { + list_del(&sp->auto_asconf_list); + sp->do_auto_asconf = 0; +@@ -3519,6 +3522,7 @@ static int sctp_setsockopt_auto_asconf(s + &sock_net(sk)->sctp.auto_asconf_splist); + sp->do_auto_asconf = 1; + } ++ spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock); + return 0; + } + +@@ -4009,18 +4013,28 @@ static int sctp_init_sock(struct sock *s + local_bh_disable(); + percpu_counter_inc(&sctp_sockets_allocated); + sock_prot_inuse_add(net, sk->sk_prot, 1); ++ ++ /* Nothing can fail after this block, otherwise ++ * sctp_destroy_sock() will be called without addr_wq_lock held ++ */ + if (net->sctp.default_auto_asconf) { ++ spin_lock(&sock_net(sk)->sctp.addr_wq_lock); + list_add_tail(&sp->auto_asconf_list, + &net->sctp.auto_asconf_splist); + sp->do_auto_asconf = 1; +- } else ++ spin_unlock(&sock_net(sk)->sctp.addr_wq_lock); ++ } else { + sp->do_auto_asconf = 0; ++ } ++ + local_bh_enable(); + + return 0; + } + +-/* Cleanup any SCTP per socket resources. */ ++/* Cleanup any SCTP per socket resources. Must be called with ++ * sock_net(sk)->sctp.addr_wq_lock held if sp->do_auto_asconf is true ++ */ + static void sctp_destroy_sock(struct sock *sk) + { + struct sctp_sock *sp; +@@ -6973,6 +6987,19 @@ void sctp_copy_sock(struct sock *newsk, + newinet->mc_list = NULL; + } + ++static inline void sctp_copy_descendant(struct sock *sk_to, ++ const struct sock *sk_from) ++{ ++ int ancestor_size = sizeof(struct inet_sock) + ++ sizeof(struct sctp_sock) - ++ offsetof(struct sctp_sock, auto_asconf_list); ++ ++ if (sk_from->sk_family == PF_INET6) ++ ancestor_size += sizeof(struct ipv6_pinfo); ++ ++ __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); ++} ++ + /* Populate the fields of the newsk from the oldsk and migrate the assoc + * and its messages to the newsk. + */ +@@ -6987,7 +7014,6 @@ static void sctp_sock_migrate(struct soc + struct sk_buff *skb, *tmp; + struct sctp_ulpevent *event; + struct sctp_bind_hashbucket *head; +- struct list_head tmplist; + + /* Migrate socket buffer sizes and all the socket level options to the + * new socket. +@@ -6995,12 +7021,7 @@ static void sctp_sock_migrate(struct soc + newsk->sk_sndbuf = oldsk->sk_sndbuf; + newsk->sk_rcvbuf = oldsk->sk_rcvbuf; + /* Brute force copy old sctp opt. */ +- if (oldsp->do_auto_asconf) { +- memcpy(&tmplist, &newsp->auto_asconf_list, sizeof(tmplist)); +- inet_sk_copy_descendant(newsk, oldsk); +- memcpy(&newsp->auto_asconf_list, &tmplist, sizeof(tmplist)); +- } else +- inet_sk_copy_descendant(newsk, oldsk); ++ sctp_copy_descendant(newsk, oldsk); + + /* Restore the ep value that was overwritten with the above structure + * copy. diff --git a/queue-3.14/sctp-fix-race-between-ootb-responce-and-route-removal.patch b/queue-3.14/sctp-fix-race-between-ootb-responce-and-route-removal.patch new file mode 100644 index 00000000000..3625b35cde5 --- /dev/null +++ b/queue-3.14/sctp-fix-race-between-ootb-responce-and-route-removal.patch @@ -0,0 +1,135 @@ +From foo@baz Fri Jul 3 19:59:07 PDT 2015 +From: Alexander Sverdlin +Date: Mon, 29 Jun 2015 10:41:03 +0200 +Subject: sctp: Fix race between OOTB responce and route removal + +From: Alexander Sverdlin + +[ Upstream commit 29c4afc4e98f4dc0ea9df22c631841f9c220b944 ] + +There is NULL pointer dereference possible during statistics update if the route +used for OOTB responce is removed at unfortunate time. If the route exists when +we receive OOTB packet and we finally jump into sctp_packet_transmit() to send +ABORT, but in the meantime route is removed under our feet, we take "no_route" +path and try to update stats with IP_INC_STATS(sock_net(asoc->base.sk), ...). + +But sctp_ootb_pkt_new() used to prepare responce packet doesn't call +sctp_transport_set_owner() and therefore there is no asoc associated with this +packet. Probably temporary asoc just for OOTB responces is overkill, so just +introduce a check like in all other places in sctp_packet_transmit(), where +"asoc" is dereferenced. + +To reproduce this, one needs to +0. ensure that sctp module is loaded (otherwise ABORT is not generated) +1. remove default route on the machine +2. while true; do + ip route del [interface-specific route] + ip route add [interface-specific route] + done +3. send enough OOTB packets (i.e. HB REQs) from another host to trigger ABORT + responce + +On x86_64 the crash looks like this: + +BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 +IP: [] sctp_packet_transmit+0x63c/0x730 [sctp] +PGD 0 +Oops: 0000 [#1] PREEMPT SMP +Modules linked in: ... +CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 4.0.5-1-ARCH #1 +Hardware name: ... +task: ffffffff818124c0 ti: ffffffff81800000 task.ti: ffffffff81800000 +RIP: 0010:[] [] sctp_packet_transmit+0x63c/0x730 [sctp] +RSP: 0018:ffff880127c037b8 EFLAGS: 00010296 +RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000015ff66b480 +RDX: 00000015ff66b400 RSI: ffff880127c17200 RDI: ffff880123403700 +RBP: ffff880127c03888 R08: 0000000000017200 R09: ffffffff814625af +R10: ffffea00047e4680 R11: 00000000ffffff80 R12: ffff8800b0d38a28 +R13: ffff8800b0d38a28 R14: ffff8800b3e88000 R15: ffffffffa05f24e0 +FS: 0000000000000000(0000) GS:ffff880127c00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b +CR2: 0000000000000020 CR3: 00000000c855b000 CR4: 00000000000007f0 +Stack: + ffff880127c03910 ffff8800b0d38a28 ffffffff8189d240 ffff88011f91b400 + ffff880127c03828 ffffffffa05c94c5 0000000000000000 ffff8800baa1c520 + 0000000000000000 0000000000000001 0000000000000000 0000000000000000 +Call Trace: + + [] ? sctp_sf_tabort_8_4_8.isra.20+0x85/0x140 [sctp] + [] ? sctp_transport_put+0x52/0x80 [sctp] + [] sctp_do_sm+0xb8c/0x19a0 [sctp] + [] ? trigger_load_balance+0x90/0x210 + [] ? update_process_times+0x59/0x60 + [] ? timerqueue_add+0x60/0xb0 + [] ? enqueue_hrtimer+0x29/0xa0 + [] ? read_tsc+0x9/0x10 + [] ? put_page+0x55/0x60 + [] ? clockevents_program_event+0x6d/0x100 + [] ? skb_free_head+0x58/0x80 + [] ? chksum_update+0x1b/0x27 [crc32c_generic] + [] ? crypto_shash_update+0xce/0xf0 + [] sctp_endpoint_bh_rcv+0x113/0x280 [sctp] + [] sctp_inq_push+0x46/0x60 [sctp] + [] sctp_rcv+0x880/0x910 [sctp] + [] ? sctp_packet_transmit_chunk+0xb0/0xb0 [sctp] + [] ? sctp_csum_update+0x20/0x20 [sctp] + [] ? ip_route_input_noref+0x235/0xd30 + [] ? ack_ioapic_level+0x7b/0x150 + [] ip_local_deliver_finish+0xae/0x210 + [] ip_local_deliver+0x35/0x90 + [] ip_rcv_finish+0xf5/0x370 + [] ip_rcv+0x2b8/0x3a0 + [] __netif_receive_skb_core+0x763/0xa50 + [] __netif_receive_skb+0x18/0x60 + [] netif_receive_skb_internal+0x40/0xd0 + [] napi_gro_receive+0xe8/0x120 + [] rtl8169_poll+0x2da/0x660 [r8169] + [] net_rx_action+0x21a/0x360 + [] __do_softirq+0xe1/0x2d0 + [] irq_exit+0xad/0xb0 + [] do_IRQ+0x58/0xf0 + [] common_interrupt+0x6d/0x6d + + [] ? hrtimer_start+0x18/0x20 + [] ? sctp_transport_destroy_rcu+0x29/0x30 [sctp] + [] ? mwait_idle+0x60/0xa0 + [] arch_cpu_idle+0xf/0x20 + [] cpu_startup_entry+0x3ec/0x480 + [] rest_init+0x85/0x90 + [] start_kernel+0x48b/0x4ac + [] ? early_idt_handlers+0x120/0x120 + [] x86_64_start_reservations+0x2a/0x2c + [] x86_64_start_kernel+0x161/0x184 +Code: 90 48 8b 80 b8 00 00 00 48 89 85 70 ff ff ff 48 83 bd 70 ff ff ff 00 0f 85 cd fa ff ff 48 89 df 31 db e8 18 63 e7 e0 48 8b 45 80 <48> 8b 40 20 48 8b 40 30 48 8b 80 68 01 00 00 65 48 ff 40 78 e9 +RIP [] sctp_packet_transmit+0x63c/0x730 [sctp] + RSP +CR2: 0000000000000020 +---[ end trace 5aec7fd2dc983574 ]--- +Kernel panic - not syncing: Fatal exception in interrupt +Kernel Offset: 0x0 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffff9fffffff) +drm_kms_helper: panic occurred, switching back to text console +---[ end Kernel panic - not syncing: Fatal exception in interrupt + +Signed-off-by: Alexander Sverdlin +Acked-by: Neil Horman +Acked-by: Marcelo Ricardo Leitner +Acked-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/output.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/sctp/output.c ++++ b/net/sctp/output.c +@@ -599,7 +599,9 @@ out: + return err; + no_route: + kfree_skb(nskb); +- IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); ++ ++ if (asoc) ++ IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); + + /* FIXME: Returning the 'err' will effect all the associations + * associated with a socket, although only one of the paths of the diff --git a/queue-3.14/sparc-Use-GFP_ATOMIC-in-ldc_alloc_exp_dring-as-it-can-be-called-in-softirq-context.patch b/queue-3.14/sparc-Use-GFP_ATOMIC-in-ldc_alloc_exp_dring-as-it-can-be-called-in-softirq-context.patch new file mode 100644 index 00000000000..f676af96d12 --- /dev/null +++ b/queue-3.14/sparc-Use-GFP_ATOMIC-in-ldc_alloc_exp_dring-as-it-can-be-called-in-softirq-context.patch @@ -0,0 +1,40 @@ +From patchwork Tue Apr 21 14:30:41 2015 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: sparc: Use GFP_ATOMIC in ldc_alloc_exp_dring() as it can be called in + softirq context +From: Sowmini Varadhan +X-Patchwork-Id: 463148 +Message-Id: <1429626641-199974-1-git-send-email-sowmini.varadhan@oracle.com> +To: sowmini.varadhan@oracle.com, sparclinux@vger.kernel.org +Cc: david.stevens@oracle.com, davem@davemloft.net +Date: Tue, 21 Apr 2015 10:30:41 -0400 + +From: Sowmini Varadhan + +Upstream commit 671d773297969bebb1732e1cdc1ec03aa53c6be2 + +Since it is possible for vnet_event_napi to end up doing +vnet_control_pkt_engine -> ... -> vnet_send_attr -> +vnet_port_alloc_tx_ring -> ldc_alloc_exp_dring -> kzalloc() +(i.e., in softirq context), kzalloc() should be called with +GFP_ATOMIC from ldc_alloc_exp_dring. + +Signed-off-by: Sowmini Varadhan +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/ldc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/sparc/kernel/ldc.c ++++ b/arch/sparc/kernel/ldc.c +@@ -2307,7 +2307,7 @@ void *ldc_alloc_exp_dring(struct ldc_cha + if (len & (8UL - 1)) + return ERR_PTR(-EINVAL); + +- buf = kzalloc(len, GFP_KERNEL); ++ buf = kzalloc(len, GFP_ATOMIC); + if (!buf) + return ERR_PTR(-ENOMEM); + diff --git a/queue-3.14/tcp-do-not-call-tcp_fastopen_reset_cipher-from-interrupt-context.patch b/queue-3.14/tcp-do-not-call-tcp_fastopen_reset_cipher-from-interrupt-context.patch new file mode 100644 index 00000000000..d8d64dc29b6 --- /dev/null +++ b/queue-3.14/tcp-do-not-call-tcp_fastopen_reset_cipher-from-interrupt-context.patch @@ -0,0 +1,114 @@ +From foo@baz Fri Jul 3 19:59:07 PDT 2015 +From: Christoph Paasch +Date: Thu, 18 Jun 2015 09:15:34 -0700 +Subject: tcp: Do not call tcp_fastopen_reset_cipher from interrupt context + +From: Christoph Paasch + +[ Upstream commit dfea2aa654243f70dc53b8648d0bbdeec55a7df1 ] + +tcp_fastopen_reset_cipher really cannot be called from interrupt +context. It allocates the tcp_fastopen_context with GFP_KERNEL and +calls crypto_alloc_cipher, which allocates all kind of stuff with +GFP_KERNEL. + +Thus, we might sleep when the key-generation is triggered by an +incoming TFO cookie-request which would then happen in interrupt- +context, as shown by enabling CONFIG_DEBUG_ATOMIC_SLEEP: + +[ 36.001813] BUG: sleeping function called from invalid context at mm/slub.c:1266 +[ 36.003624] in_atomic(): 1, irqs_disabled(): 0, pid: 1016, name: packetdrill +[ 36.004859] CPU: 1 PID: 1016 Comm: packetdrill Not tainted 4.1.0-rc7 #14 +[ 36.006085] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 +[ 36.008250] 00000000000004f2 ffff88007f8838a8 ffffffff8171d53a ffff880075a084a8 +[ 36.009630] ffff880075a08000 ffff88007f8838c8 ffffffff810967d3 ffff88007f883928 +[ 36.011076] 0000000000000000 ffff88007f8838f8 ffffffff81096892 ffff88007f89be00 +[ 36.012494] Call Trace: +[ 36.012953] [] dump_stack+0x4f/0x6d +[ 36.014085] [] ___might_sleep+0x103/0x170 +[ 36.015117] [] __might_sleep+0x52/0x90 +[ 36.016117] [] kmem_cache_alloc_trace+0x47/0x190 +[ 36.017266] [] ? tcp_fastopen_reset_cipher+0x42/0x130 +[ 36.018485] [] tcp_fastopen_reset_cipher+0x42/0x130 +[ 36.019679] [] tcp_fastopen_init_key_once+0x61/0x70 +[ 36.020884] [] __tcp_fastopen_cookie_gen+0x1c/0x60 +[ 36.022058] [] tcp_try_fastopen+0x58f/0x730 +[ 36.023118] [] tcp_conn_request+0x3e8/0x7b0 +[ 36.024185] [] ? __module_text_address+0x12/0x60 +[ 36.025327] [] tcp_v4_conn_request+0x51/0x60 +[ 36.026410] [] tcp_rcv_state_process+0x190/0xda0 +[ 36.027556] [] ? __inet_lookup_established+0x47/0x170 +[ 36.028784] [] tcp_v4_do_rcv+0x16d/0x3d0 +[ 36.029832] [] ? security_sock_rcv_skb+0x16/0x20 +[ 36.030936] [] tcp_v4_rcv+0x77a/0x7b0 +[ 36.031875] [] ? iptable_filter_hook+0x33/0x70 +[ 36.032953] [] ip_local_deliver_finish+0x92/0x1f0 +[ 36.034065] [] ip_local_deliver+0x9a/0xb0 +[ 36.035069] [] ? ip_rcv+0x3d0/0x3d0 +[ 36.035963] [] ip_rcv_finish+0x119/0x330 +[ 36.036950] [] ip_rcv+0x2e7/0x3d0 +[ 36.037847] [] __netif_receive_skb_core+0x552/0x930 +[ 36.038994] [] __netif_receive_skb+0x27/0x70 +[ 36.040033] [] process_backlog+0xd2/0x1f0 +[ 36.041025] [] net_rx_action+0x122/0x310 +[ 36.042007] [] __do_softirq+0x103/0x2f0 +[ 36.042978] [] do_softirq_own_stack+0x1c/0x30 + +This patch moves the call to tcp_fastopen_init_key_once to the places +where a listener socket creates its TFO-state, which always happens in +user-context (either from the setsockopt, or implicitly during the +listen()-call) + +Cc: Eric Dumazet +Cc: Hannes Frederic Sowa +Fixes: 222e83d2e0ae ("tcp: switch tcp_fastopen key generation to net_get_random_once") +Signed-off-by: Christoph Paasch +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/af_inet.c | 2 ++ + net/ipv4/tcp.c | 7 +++++-- + net/ipv4/tcp_fastopen.c | 2 -- + 3 files changed, 7 insertions(+), 4 deletions(-) + +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -228,6 +228,8 @@ int inet_listen(struct socket *sock, int + err = 0; + if (err) + goto out; ++ ++ tcp_fastopen_init_key_once(true); + } + err = inet_csk_listen_start(sk, backlog); + if (err) +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2684,10 +2684,13 @@ static int do_tcp_setsockopt(struct sock + + case TCP_FASTOPEN: + if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | +- TCPF_LISTEN))) ++ TCPF_LISTEN))) { ++ tcp_fastopen_init_key_once(true); ++ + err = fastopen_init_queue(sk, val); +- else ++ } else { + err = -EINVAL; ++ } + break; + case TCP_TIMESTAMP: + if (!tp->repair) +--- a/net/ipv4/tcp_fastopen.c ++++ b/net/ipv4/tcp_fastopen.c +@@ -84,8 +84,6 @@ void tcp_fastopen_cookie_gen(__be32 src, + __be32 path[4] = { src, dst, 0, 0 }; + struct tcp_fastopen_context *ctx; + +- tcp_fastopen_init_key_once(true); +- + rcu_read_lock(); + ctx = rcu_dereference(tcp_fastopen_ctx); + if (ctx) {