From: Greg Kroah-Hartman Date: Fri, 15 Sep 2017 06:21:56 +0000 (-0700) Subject: 4.13-stable patches X-Git-Tag: v4.9.51~28 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=49163f31289bea6d15a31e14113ac00cf5c72cb2;p=thirdparty%2Fkernel%2Fstable-queue.git 4.13-stable patches added patches: gianfar-fix-tx-flow-control-deactivation.patch ip6_gre-update-mtu-properly-in-ip6gre_err.patch ip_tunnel-fix-setting-ttl-and-tos-value-in-collect_md-mode.patch ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch ipv6-fix-typo-in-fib6_net_exit.patch revert-net-fix-percpu-memory-leaks.patch revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch sctp-fix-missing-wake-ups-in-some-situations.patch tcp-fix-a-request-socket-leak.patch udp-drop-head-states-only-when-all-skb-references-are-gone.patch vhost_net-correctly-check-tx-avail-during-rx-busy-polling.patch --- diff --git a/queue-4.13/gianfar-fix-tx-flow-control-deactivation.patch b/queue-4.13/gianfar-fix-tx-flow-control-deactivation.patch new file mode 100644 index 00000000000..37f6bd918a2 --- /dev/null +++ b/queue-4.13/gianfar-fix-tx-flow-control-deactivation.patch @@ -0,0 +1,39 @@ +From foo@baz Thu Sep 14 23:20:44 PDT 2017 +From: Claudiu Manoil +Date: Mon, 4 Sep 2017 10:45:28 +0300 +Subject: gianfar: Fix Tx flow control deactivation + +From: Claudiu Manoil + + +[ Upstream commit 5d621672bc1a1e5090c1ac5432a18c79e0e13e03 ] + +The wrong register is checked for the Tx flow control bit, +it should have been maccfg1 not maccfg2. +This went unnoticed for so long probably because the impact is +hardly visible, not to mention the tangled code from adjust_link(). +First, link flow control (i.e. handling of Rx/Tx link level pause frames) +is disabled by default (needs to be enabled via 'ethtool -A'). +Secondly, maccfg2 always returns 0 for tx_flow_oldval (except for a few +old boards), which results in Tx flow control remaining always on +once activated. + +Fixes: 45b679c9a3ccd9e34f28e6ec677b812a860eb8eb ("gianfar: Implement PAUSE frame generation support") +Signed-off-by: Claudiu Manoil +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/gianfar.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/freescale/gianfar.c ++++ b/drivers/net/ethernet/freescale/gianfar.c +@@ -3687,7 +3687,7 @@ static noinline void gfar_update_link_st + u32 tempval1 = gfar_read(®s->maccfg1); + u32 tempval = gfar_read(®s->maccfg2); + u32 ecntrl = gfar_read(®s->ecntrl); +- u32 tx_flow_oldval = (tempval & MACCFG1_TX_FLOW); ++ u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW); + + if (phydev->duplex != priv->oldduplex) { + if (!(phydev->duplex)) diff --git a/queue-4.13/ip6_gre-update-mtu-properly-in-ip6gre_err.patch b/queue-4.13/ip6_gre-update-mtu-properly-in-ip6gre_err.patch new file mode 100644 index 00000000000..201195fb34d --- /dev/null +++ b/queue-4.13/ip6_gre-update-mtu-properly-in-ip6gre_err.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Sep 14 23:20:44 PDT 2017 +From: Xin Long +Date: Tue, 5 Sep 2017 17:26:33 +0800 +Subject: ip6_gre: update mtu properly in ip6gre_err + +From: Xin Long + + +[ Upstream commit 5c25f30c93fdc5bf25e62101aeaae7a4f9b421b3 ] + +Now when probessing ICMPV6_PKT_TOOBIG, ip6gre_err only subtracts the +offset of gre header from mtu info. The expected mtu of gre device +should also subtract gre header. Otherwise, the next packets still +can't be sent out. + +Jianlin found this issue when using the topo: + client(ip6gre)<---->(nic1)route(nic2)<----->(ip6gre)server + +and reducing nic2's mtu, then both tcp and sctp's performance with +big size data became 0. + +This patch is to fix it by also subtracting grehdr (tun->tun_hlen) +from mtu info when updating gre device's mtu in ip6gre_err(). It +also needs to subtract ETH_HLEN if gre dev'type is ARPHRD_ETHER. + +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -432,7 +432,9 @@ static void ip6gre_err(struct sk_buff *s + } + break; + case ICMPV6_PKT_TOOBIG: +- mtu = be32_to_cpu(info) - offset; ++ mtu = be32_to_cpu(info) - offset - t->tun_hlen; ++ if (t->dev->type == ARPHRD_ETHER) ++ mtu -= ETH_HLEN; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + t->dev->mtu = mtu; diff --git a/queue-4.13/ip_tunnel-fix-setting-ttl-and-tos-value-in-collect_md-mode.patch b/queue-4.13/ip_tunnel-fix-setting-ttl-and-tos-value-in-collect_md-mode.patch new file mode 100644 index 00000000000..e512ac1847d --- /dev/null +++ b/queue-4.13/ip_tunnel-fix-setting-ttl-and-tos-value-in-collect_md-mode.patch @@ -0,0 +1,36 @@ +From foo@baz Thu Sep 14 23:20:44 PDT 2017 +From: Haishuang Yan +Date: Thu, 7 Sep 2017 14:08:34 +0800 +Subject: ip_tunnel: fix setting ttl and tos value in collect_md mode + +From: Haishuang Yan + + +[ Upstream commit 0f693f1995cf002432b70f43ce73f79bf8d0b6c9 ] + +ttl and tos variables are declared and assigned, but are not used in +iptunnel_xmit() function. + +Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel") +Cc: Alexei Starovoitov +Signed-off-by: Haishuang Yan +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -618,8 +618,8 @@ void ip_md_tunnel_xmit(struct sk_buff *s + ip_rt_put(rt); + goto tx_dropped; + } +- iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos, +- key->ttl, df, !net_eq(tunnel->net, dev_net(dev))); ++ iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, ++ df, !net_eq(tunnel->net, dev_net(dev))); + return; + tx_error: + dev->stats.tx_errors++; diff --git a/queue-4.13/ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch b/queue-4.13/ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch new file mode 100644 index 00000000000..718748a21cb --- /dev/null +++ b/queue-4.13/ipv6-fix-memory-leak-with-multiple-tables-during-netns-destruction.patch @@ -0,0 +1,76 @@ +From foo@baz Thu Sep 14 23:20:44 PDT 2017 +From: Sabrina Dubroca +Date: Fri, 8 Sep 2017 10:26:19 +0200 +Subject: ipv6: fix memory leak with multiple tables during netns destruction + +From: Sabrina Dubroca + + +[ Upstream commit ba1cc08d9488c94cb8d94f545305688b72a2a300 ] + +fib6_net_exit only frees the main and local tables. If another table was +created with fib6_alloc_table, we leak it when the netns is destroyed. + +Fix this in the same way ip_fib_net_exit cleans up tables, by walking +through the whole hashtable of fib6_table's. We can get rid of the +special cases for local and main, since they're also part of the +hashtable. + +Reproducer: + ip netns add x + ip -net x -6 rule add from 6003:1::/64 table 100 + ip netns del x + +Reported-by: Jianlin Shi +Fixes: 58f09b78b730 ("[NETNS][IPV6] ip6_fib - make it per network namespace") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_fib.c | 25 +++++++++++++++++++------ + 1 file changed, 19 insertions(+), 6 deletions(-) + +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -198,6 +198,12 @@ static void rt6_release(struct rt6_info + } + } + ++static void fib6_free_table(struct fib6_table *table) ++{ ++ inetpeer_invalidate_tree(&table->tb6_peers); ++ kfree(table); ++} ++ + static void fib6_link_table(struct net *net, struct fib6_table *tb) + { + unsigned int h; +@@ -1915,15 +1921,22 @@ out_timer: + + static void fib6_net_exit(struct net *net) + { ++ unsigned int i; ++ + rt6_ifdown(net, NULL); + del_timer_sync(&net->ipv6.ip6_fib_timer); + +-#ifdef CONFIG_IPV6_MULTIPLE_TABLES +- inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); +- kfree(net->ipv6.fib6_local_tbl); +-#endif +- inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); +- kfree(net->ipv6.fib6_main_tbl); ++ for (i = 0; i < FIB_TABLE_HASHSZ; i++) { ++ struct hlist_head *head = &net->ipv6.fib_table_hash[i]; ++ struct hlist_node *tmp; ++ struct fib6_table *tb; ++ ++ hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) { ++ hlist_del(&tb->tb6_hlist); ++ fib6_free_table(tb); ++ } ++ } ++ + kfree(net->ipv6.fib_table_hash); + kfree(net->ipv6.rt6_stats); + } diff --git a/queue-4.13/ipv6-fix-typo-in-fib6_net_exit.patch b/queue-4.13/ipv6-fix-typo-in-fib6_net_exit.patch new file mode 100644 index 00000000000..759a3904d3e --- /dev/null +++ b/queue-4.13/ipv6-fix-typo-in-fib6_net_exit.patch @@ -0,0 +1,31 @@ +From foo@baz Thu Sep 14 23:20:44 PDT 2017 +From: Eric Dumazet +Date: Fri, 8 Sep 2017 15:48:47 -0700 +Subject: ipv6: fix typo in fib6_net_exit() + +From: Eric Dumazet + + +[ Upstream commit 32a805baf0fb70b6dbedefcd7249ac7f580f9e3b ] + +IPv6 FIB should use FIB6_TABLE_HASHSZ, not FIB_TABLE_HASHSZ. + +Fixes: ba1cc08d9488 ("ipv6: fix memory leak with multiple tables during netns destruction") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_fib.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -1926,7 +1926,7 @@ static void fib6_net_exit(struct net *ne + rt6_ifdown(net, NULL); + del_timer_sync(&net->ipv6.ip6_fib_timer); + +- for (i = 0; i < FIB_TABLE_HASHSZ; i++) { ++ for (i = 0; i < FIB6_TABLE_HASHSZ; i++) { + struct hlist_head *head = &net->ipv6.fib_table_hash[i]; + struct hlist_node *tmp; + struct fib6_table *tb; diff --git a/queue-4.13/revert-net-fix-percpu-memory-leaks.patch b/queue-4.13/revert-net-fix-percpu-memory-leaks.patch new file mode 100644 index 00000000000..1fa5b71871f --- /dev/null +++ b/queue-4.13/revert-net-fix-percpu-memory-leaks.patch @@ -0,0 +1,151 @@ +From foo@baz Thu Sep 14 23:20:44 PDT 2017 +From: Jesper Dangaard Brouer +Date: Fri, 1 Sep 2017 11:26:13 +0200 +Subject: Revert "net: fix percpu memory leaks" + +From: Jesper Dangaard Brouer + + +[ Upstream commit 5a63643e583b6a9789d7a225ae076fb4e603991c ] + +This reverts commit 1d6119baf0610f813eb9d9580eb4fd16de5b4ceb. + +After reverting commit 6d7b857d541e ("net: use lib/percpu_counter API +for fragmentation mem accounting") then here is no need for this +fix-up patch. As percpu_counter is no longer used, it cannot +memory leak it any-longer. + +Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") +Fixes: 1d6119baf061 ("net: fix percpu memory leaks") +Signed-off-by: Jesper Dangaard Brouer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_frag.h | 7 +------ + net/ieee802154/6lowpan/reassembly.c | 11 +++-------- + net/ipv4/ip_fragment.c | 12 +++--------- + net/ipv6/netfilter/nf_conntrack_reasm.c | 12 +++--------- + net/ipv6/reassembly.c | 12 +++--------- + 5 files changed, 13 insertions(+), 41 deletions(-) + +--- a/include/net/inet_frag.h ++++ b/include/net/inet_frag.h +@@ -103,15 +103,10 @@ struct inet_frags { + int inet_frags_init(struct inet_frags *); + void inet_frags_fini(struct inet_frags *); + +-static inline int inet_frags_init_net(struct netns_frags *nf) ++static inline void inet_frags_init_net(struct netns_frags *nf) + { + atomic_set(&nf->mem, 0); +- return 0; + } +-static inline void inet_frags_uninit_net(struct netns_frags *nf) +-{ +-} +- + void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); + + void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); +--- a/net/ieee802154/6lowpan/reassembly.c ++++ b/net/ieee802154/6lowpan/reassembly.c +@@ -580,19 +580,14 @@ static int __net_init lowpan_frags_init_ + { + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); +- int res; + + ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; + ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; + +- res = inet_frags_init_net(&ieee802154_lowpan->frags); +- if (res) +- return res; +- res = lowpan_frags_ns_sysctl_register(net); +- if (res) +- inet_frags_uninit_net(&ieee802154_lowpan->frags); +- return res; ++ inet_frags_init_net(&ieee802154_lowpan->frags); ++ ++ return lowpan_frags_ns_sysctl_register(net); + } + + static void __net_exit lowpan_frags_exit_net(struct net *net) +--- a/net/ipv4/ip_fragment.c ++++ b/net/ipv4/ip_fragment.c +@@ -844,8 +844,6 @@ static void __init ip4_frags_ctl_registe + + static int __net_init ipv4_frags_init_net(struct net *net) + { +- int res; +- + /* Fragment cache limits. + * + * The fragment memory accounting code, (tries to) account for +@@ -871,13 +869,9 @@ static int __net_init ipv4_frags_init_ne + + net->ipv4.frags.max_dist = 64; + +- res = inet_frags_init_net(&net->ipv4.frags); +- if (res) +- return res; +- res = ip4_frags_ns_ctl_register(net); +- if (res) +- inet_frags_uninit_net(&net->ipv4.frags); +- return res; ++ inet_frags_init_net(&net->ipv4.frags); ++ ++ return ip4_frags_ns_ctl_register(net); + } + + static void __net_exit ipv4_frags_exit_net(struct net *net) +--- a/net/ipv6/netfilter/nf_conntrack_reasm.c ++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c +@@ -622,18 +622,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); + + static int nf_ct_net_init(struct net *net) + { +- int res; +- + net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; + net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; +- res = inet_frags_init_net(&net->nf_frag.frags); +- if (res) +- return res; +- res = nf_ct_frag6_sysctl_register(net); +- if (res) +- inet_frags_uninit_net(&net->nf_frag.frags); +- return res; ++ inet_frags_init_net(&net->nf_frag.frags); ++ ++ return nf_ct_frag6_sysctl_register(net); + } + + static void nf_ct_net_exit(struct net *net) +--- a/net/ipv6/reassembly.c ++++ b/net/ipv6/reassembly.c +@@ -714,19 +714,13 @@ static void ip6_frags_sysctl_unregister( + + static int __net_init ipv6_frags_init_net(struct net *net) + { +- int res; +- + net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; + net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; + +- res = inet_frags_init_net(&net->ipv6.frags); +- if (res) +- return res; +- res = ip6_frags_ns_sysctl_register(net); +- if (res) +- inet_frags_uninit_net(&net->ipv6.frags); +- return res; ++ inet_frags_init_net(&net->ipv6.frags); ++ ++ return ip6_frags_ns_sysctl_register(net); + } + + static void __net_exit ipv6_frags_exit_net(struct net *net) diff --git a/queue-4.13/revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch b/queue-4.13/revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch new file mode 100644 index 00000000000..fbcd610526f --- /dev/null +++ b/queue-4.13/revert-net-use-lib-percpu_counter-api-for-fragmentation-mem-accounting.patch @@ -0,0 +1,141 @@ +From foo@baz Thu Sep 14 23:20:44 PDT 2017 +From: Jesper Dangaard Brouer +Date: Fri, 1 Sep 2017 11:26:08 +0200 +Subject: Revert "net: use lib/percpu_counter API for fragmentation mem accounting" + +From: Jesper Dangaard Brouer + + +[ Upstream commit fb452a1aa3fd4034d7999e309c5466ff2d7005aa ] + +This reverts commit 6d7b857d541ecd1d9bd997c97242d4ef94b19de2. + +There is a bug in fragmentation codes use of the percpu_counter API, +that can cause issues on systems with many CPUs. + +The frag_mem_limit() just reads the global counter (fbc->count), +without considering other CPUs can have upto batch size (130K) that +haven't been subtracted yet. Due to the 3MBytes lower thresh limit, +this become dangerous at >=24 CPUs (3*1024*1024/130000=24). + +The correct API usage would be to use __percpu_counter_compare() which +does the right thing, and takes into account the number of (online) +CPUs and batch size, to account for this and call __percpu_counter_sum() +when needed. + +We choose to revert the use of the lib/percpu_counter API for frag +memory accounting for several reasons: + +1) On systems with CPUs > 24, the heavier fully locked + __percpu_counter_sum() is always invoked, which will be more + expensive than the atomic_t that is reverted to. + +Given systems with more than 24 CPUs are becoming common this doesn't +seem like a good option. To mitigate this, the batch size could be +decreased and thresh be increased. + +2) The add_frag_mem_limit+sub_frag_mem_limit pairs happen on the RX + CPU, before SKBs are pushed into sockets on remote CPUs. Given + NICs can only hash on L2 part of the IP-header, the NIC-RXq's will + likely be limited. Thus, a fair chance that atomic add+dec happen + on the same CPU. + +Revert note that commit 1d6119baf061 ("net: fix percpu memory leaks") +removed init_frag_mem_limit() and instead use inet_frags_init_net(). +After this revert, inet_frags_uninit_net() becomes empty. + +Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") +Fixes: 1d6119baf061 ("net: fix percpu memory leaks") +Signed-off-by: Jesper Dangaard Brouer +Acked-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_frag.h | 30 +++++++++--------------------- + net/ipv4/inet_fragment.c | 4 +--- + 2 files changed, 10 insertions(+), 24 deletions(-) + +--- a/include/net/inet_frag.h ++++ b/include/net/inet_frag.h +@@ -1,14 +1,9 @@ + #ifndef __NET_FRAG_H__ + #define __NET_FRAG_H__ + +-#include +- + struct netns_frags { +- /* The percpu_counter "mem" need to be cacheline aligned. +- * mem.count must not share cacheline with other writers +- */ +- struct percpu_counter mem ____cacheline_aligned_in_smp; +- ++ /* Keep atomic mem on separate cachelines in structs that include it */ ++ atomic_t mem ____cacheline_aligned_in_smp; + /* sysctls */ + int timeout; + int high_thresh; +@@ -110,11 +105,11 @@ void inet_frags_fini(struct inet_frags * + + static inline int inet_frags_init_net(struct netns_frags *nf) + { +- return percpu_counter_init(&nf->mem, 0, GFP_KERNEL); ++ atomic_set(&nf->mem, 0); ++ return 0; + } + static inline void inet_frags_uninit_net(struct netns_frags *nf) + { +- percpu_counter_destroy(&nf->mem); + } + + void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); +@@ -140,31 +135,24 @@ static inline bool inet_frag_evicting(st + + /* Memory Tracking Functions. */ + +-/* The default percpu_counter batch size is not big enough to scale to +- * fragmentation mem acct sizes. +- * The mem size of a 64K fragment is approx: +- * (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes +- */ +-static unsigned int frag_percpu_counter_batch = 130000; +- + static inline int frag_mem_limit(struct netns_frags *nf) + { +- return percpu_counter_read(&nf->mem); ++ return atomic_read(&nf->mem); + } + + static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) + { +- percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch); ++ atomic_sub(i, &nf->mem); + } + + static inline void add_frag_mem_limit(struct netns_frags *nf, int i) + { +- percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch); ++ atomic_add(i, &nf->mem); + } + +-static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) ++static inline int sum_frag_mem_limit(struct netns_frags *nf) + { +- return percpu_counter_sum_positive(&nf->mem); ++ return atomic_read(&nf->mem); + } + + /* RFC 3168 support : +--- a/net/ipv4/inet_fragment.c ++++ b/net/ipv4/inet_fragment.c +@@ -234,10 +234,8 @@ evict_again: + cond_resched(); + + if (read_seqretry(&f->rnd_seqlock, seq) || +- percpu_counter_sum(&nf->mem)) ++ sum_frag_mem_limit(nf)) + goto evict_again; +- +- percpu_counter_destroy(&nf->mem); + } + EXPORT_SYMBOL(inet_frags_exit_net); + diff --git a/queue-4.13/sctp-fix-missing-wake-ups-in-some-situations.patch b/queue-4.13/sctp-fix-missing-wake-ups-in-some-situations.patch new file mode 100644 index 00000000000..edd848f131f --- /dev/null +++ b/queue-4.13/sctp-fix-missing-wake-ups-in-some-situations.patch @@ -0,0 +1,52 @@ +From foo@baz Thu Sep 14 23:20:44 PDT 2017 +From: Marcelo Ricardo Leitner +Date: Fri, 8 Sep 2017 11:35:21 -0300 +Subject: sctp: fix missing wake ups in some situations + +From: Marcelo Ricardo Leitner + + +[ Upstream commit 7906b00f5cd1cd484fced7fcda892176e3202c8a ] + +Commit fb586f25300f ("sctp: delay calls to sk_data_ready() as much as +possible") minimized the number of wake ups that are triggered in case +the association receives a packet with multiple data chunks on it and/or +when io_events are enabled and then commit 0970f5b36659 ("sctp: signal +sk_data_ready earlier on data chunks reception") moved the wake up to as +soon as possible. It thus relies on the state machine running later to +clean the flag that the event was already generated. + +The issue is that there are 2 call paths that calls +sctp_ulpq_tail_event() outside of the state machine, causing the flag to +linger and possibly omitting a needed wake up in the sequence. + +One of the call paths is when enabling SCTP_SENDER_DRY_EVENTS via +setsockopt(SCTP_EVENTS), as noticed by Harald Welte. The other is when +partial reliability triggers removal of chunks from the send queue when +the application calls sendmsg(). + +This commit fixes it by not setting the flag in case the socket is not +owned by the user, as it won't be cleaned later. This works for +user-initiated calls and also for rx path processing. + +Fixes: fb586f25300f ("sctp: delay calls to sk_data_ready() as much as possible") +Reported-by: Harald Welte +Signed-off-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/ulpqueue.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/sctp/ulpqueue.c ++++ b/net/sctp/ulpqueue.c +@@ -265,7 +265,8 @@ int sctp_ulpq_tail_event(struct sctp_ulp + sctp_ulpq_clear_pd(ulpq); + + if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) { +- sp->data_ready_signalled = 1; ++ if (!sock_owned_by_user(sk)) ++ sp->data_ready_signalled = 1; + sk->sk_data_ready(sk); + } + return 1; diff --git a/queue-4.13/tcp-fix-a-request-socket-leak.patch b/queue-4.13/tcp-fix-a-request-socket-leak.patch new file mode 100644 index 00000000000..371fad11163 --- /dev/null +++ b/queue-4.13/tcp-fix-a-request-socket-leak.patch @@ -0,0 +1,54 @@ +From foo@baz Thu Sep 14 23:20:44 PDT 2017 +From: Eric Dumazet +Date: Fri, 8 Sep 2017 12:44:47 -0700 +Subject: tcp: fix a request socket leak + +From: Eric Dumazet + + +[ Upstream commit 1f3b359f1004bd34b7b0bad70b93e3c7af92a37b ] + +While the cited commit fixed a possible deadlock, it added a leak +of the request socket, since reqsk_put() must be called if the BPF +filter decided the ACK packet must be dropped. + +Fixes: d624d276d1dd ("tcp: fix possible deadlock in TCP stack vs BPF filter") +Signed-off-by: Eric Dumazet +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_ipv4.c | 6 +++--- + net/ipv6/tcp_ipv6.c | 6 +++--- + 2 files changed, 6 insertions(+), 6 deletions(-) + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1722,9 +1722,9 @@ process: + */ + sock_hold(sk); + refcounted = true; +- if (tcp_filter(sk, skb)) +- goto discard_and_relse; +- nsk = tcp_check_req(sk, skb, req, false); ++ nsk = NULL; ++ if (!tcp_filter(sk, skb)) ++ nsk = tcp_check_req(sk, skb, req, false); + if (!nsk) { + reqsk_put(req); + goto discard_and_relse; +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1456,9 +1456,9 @@ process: + } + sock_hold(sk); + refcounted = true; +- if (tcp_filter(sk, skb)) +- goto discard_and_relse; +- nsk = tcp_check_req(sk, skb, req, false); ++ nsk = NULL; ++ if (!tcp_filter(sk, skb)) ++ nsk = tcp_check_req(sk, skb, req, false); + if (!nsk) { + reqsk_put(req); + goto discard_and_relse; diff --git a/queue-4.13/udp-drop-head-states-only-when-all-skb-references-are-gone.patch b/queue-4.13/udp-drop-head-states-only-when-all-skb-references-are-gone.patch new file mode 100644 index 00000000000..69ed481a06e --- /dev/null +++ b/queue-4.13/udp-drop-head-states-only-when-all-skb-references-are-gone.patch @@ -0,0 +1,82 @@ +From foo@baz Thu Sep 14 23:20:44 PDT 2017 +From: Paolo Abeni +Date: Wed, 6 Sep 2017 14:44:36 +0200 +Subject: udp: drop head states only when all skb references are gone + +From: Paolo Abeni + + +[ Upstream commit ca2c1418efe9f7fe37aa1f355efdf4eb293673ce ] + +After commit 0ddf3fb2c43d ("udp: preserve skb->dst if required +for IP options processing") we clear the skb head state as soon +as the skb carrying them is first processed. + +Since the same skb can be processed several times when MSG_PEEK +is used, we can end up lacking the required head states, and +eventually oopsing. + +Fix this clearing the skb head state only when processing the +last skb reference. + +Reported-by: Eric Dumazet +Fixes: 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing") +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 2 +- + net/core/skbuff.c | 9 +++------ + net/ipv4/udp.c | 5 ++++- + 3 files changed, 8 insertions(+), 8 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -885,7 +885,7 @@ void kfree_skb(struct sk_buff *skb); + void kfree_skb_list(struct sk_buff *segs); + void skb_tx_error(struct sk_buff *skb); + void consume_skb(struct sk_buff *skb); +-void consume_stateless_skb(struct sk_buff *skb); ++void __consume_stateless_skb(struct sk_buff *skb); + void __kfree_skb(struct sk_buff *skb); + extern struct kmem_cache *skbuff_head_cache; + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -753,14 +753,11 @@ EXPORT_SYMBOL(consume_skb); + * consume_stateless_skb - free an skbuff, assuming it is stateless + * @skb: buffer to free + * +- * Works like consume_skb(), but this variant assumes that all the head +- * states have been already dropped. ++ * Alike consume_skb(), but this variant assumes that this is the last ++ * skb reference and all the head states have been already dropped + */ +-void consume_stateless_skb(struct sk_buff *skb) ++void __consume_stateless_skb(struct sk_buff *skb) + { +- if (!skb_unref(skb)) +- return; +- + trace_consume_skb(skb); + if (likely(skb->head)) + skb_release_data(skb); +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1386,12 +1386,15 @@ void skb_consume_udp(struct sock *sk, st + unlock_sock_fast(sk, slow); + } + ++ if (!skb_unref(skb)) ++ return; ++ + /* In the more common cases we cleared the head states previously, + * see __udp_queue_rcv_skb(). + */ + if (unlikely(udp_skb_has_head_state(skb))) + skb_release_head_state(skb); +- consume_stateless_skb(skb); ++ __consume_stateless_skb(skb); + } + EXPORT_SYMBOL_GPL(skb_consume_udp); + diff --git a/queue-4.13/vhost_net-correctly-check-tx-avail-during-rx-busy-polling.patch b/queue-4.13/vhost_net-correctly-check-tx-avail-during-rx-busy-polling.patch new file mode 100644 index 00000000000..0df738e689e --- /dev/null +++ b/queue-4.13/vhost_net-correctly-check-tx-avail-during-rx-busy-polling.patch @@ -0,0 +1,47 @@ +From foo@baz Thu Sep 14 23:20:44 PDT 2017 +From: Jason Wang +Date: Tue, 5 Sep 2017 09:22:05 +0800 +Subject: vhost_net: correctly check tx avail during rx busy polling + +From: Jason Wang + + +[ Upstream commit 8b949bef9172ca69d918e93509a4ecb03d0355e0 ] + +We check tx avail through vhost_enable_notify() in the past which is +wrong since it only checks whether or not guest has filled more +available buffer since last avail idx synchronization which was just +done by vhost_vq_avail_empty() before. What we really want is checking +pending buffers in the avail ring. Fix this by calling +vhost_vq_avail_empty() instead. + +This issue could be noticed by doing netperf TCP_RR benchmark as +client from guest (but not host). With this fix, TCP_RR from guest to +localhost restores from 1375.91 trans per sec to 55235.28 trans per +sec on my laptop (Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz). + +Fixes: 030881372460 ("vhost_net: basic polling support") +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/net.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/vhost/net.c ++++ b/drivers/vhost/net.c +@@ -634,8 +634,13 @@ static int vhost_net_rx_peek_head_len(st + + preempt_enable(); + +- if (vhost_enable_notify(&net->dev, vq)) ++ if (!vhost_vq_avail_empty(&net->dev, vq)) + vhost_poll_queue(&vq->poll); ++ else if (unlikely(vhost_enable_notify(&net->dev, vq))) { ++ vhost_disable_notify(&net->dev, vq); ++ vhost_poll_queue(&vq->poll); ++ } ++ + mutex_unlock(&vq->mutex); + + len = peek_head_len(rvq, sk);