--- /dev/null
+From foo@baz Thu Sep 14 23:20:44 PDT 2017
+From: Claudiu Manoil <claudiu.manoil@nxp.com>
+Date: Mon, 4 Sep 2017 10:45:28 +0300
+Subject: gianfar: Fix Tx flow control deactivation
+
+From: Claudiu Manoil <claudiu.manoil@nxp.com>
+
+
+[ Upstream commit 5d621672bc1a1e5090c1ac5432a18c79e0e13e03 ]
+
+The wrong register is checked for the Tx flow control bit,
+it should have been maccfg1 not maccfg2.
+This went unnoticed for so long probably because the impact is
+hardly visible, not to mention the tangled code from adjust_link().
+First, link flow control (i.e. handling of Rx/Tx link level pause frames)
+is disabled by default (needs to be enabled via 'ethtool -A').
+Secondly, maccfg2 always returns 0 for tx_flow_oldval (except for a few
+old boards), which results in Tx flow control remaining always on
+once activated.
+
+Fixes: 45b679c9a3ccd9e34f28e6ec677b812a860eb8eb ("gianfar: Implement PAUSE frame generation support")
+Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/gianfar.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/freescale/gianfar.c
++++ b/drivers/net/ethernet/freescale/gianfar.c
+@@ -3687,7 +3687,7 @@ static noinline void gfar_update_link_st
+ u32 tempval1 = gfar_read(®s->maccfg1);
+ u32 tempval = gfar_read(®s->maccfg2);
+ u32 ecntrl = gfar_read(®s->ecntrl);
+- u32 tx_flow_oldval = (tempval & MACCFG1_TX_FLOW);
++ u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW);
+
+ if (phydev->duplex != priv->oldduplex) {
+ if (!(phydev->duplex))
--- /dev/null
+From foo@baz Thu Sep 14 23:20:44 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 5 Sep 2017 17:26:33 +0800
+Subject: ip6_gre: update mtu properly in ip6gre_err
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 5c25f30c93fdc5bf25e62101aeaae7a4f9b421b3 ]
+
+Now when probessing ICMPV6_PKT_TOOBIG, ip6gre_err only subtracts the
+offset of gre header from mtu info. The expected mtu of gre device
+should also subtract gre header. Otherwise, the next packets still
+can't be sent out.
+
+Jianlin found this issue when using the topo:
+ client(ip6gre)<---->(nic1)route(nic2)<----->(ip6gre)server
+
+and reducing nic2's mtu, then both tcp and sctp's performance with
+big size data became 0.
+
+This patch is to fix it by also subtracting grehdr (tun->tun_hlen)
+from mtu info when updating gre device's mtu in ip6gre_err(). It
+also needs to subtract ETH_HLEN if gre dev'type is ARPHRD_ETHER.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -432,7 +432,9 @@ static void ip6gre_err(struct sk_buff *s
+ }
+ break;
+ case ICMPV6_PKT_TOOBIG:
+- mtu = be32_to_cpu(info) - offset;
++ mtu = be32_to_cpu(info) - offset - t->tun_hlen;
++ if (t->dev->type == ARPHRD_ETHER)
++ mtu -= ETH_HLEN;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ t->dev->mtu = mtu;
--- /dev/null
+From foo@baz Thu Sep 14 23:20:44 PDT 2017
+From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
+Date: Thu, 7 Sep 2017 14:08:34 +0800
+Subject: ip_tunnel: fix setting ttl and tos value in collect_md mode
+
+From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
+
+
+[ Upstream commit 0f693f1995cf002432b70f43ce73f79bf8d0b6c9 ]
+
+ttl and tos variables are declared and assigned, but are not used in
+iptunnel_xmit() function.
+
+Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel")
+Cc: Alexei Starovoitov <ast@fb.com>
+Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_tunnel.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -618,8 +618,8 @@ void ip_md_tunnel_xmit(struct sk_buff *s
+ ip_rt_put(rt);
+ goto tx_dropped;
+ }
+- iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos,
+- key->ttl, df, !net_eq(tunnel->net, dev_net(dev)));
++ iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
++ df, !net_eq(tunnel->net, dev_net(dev)));
+ return;
+ tx_error:
+ dev->stats.tx_errors++;
--- /dev/null
+From foo@baz Thu Sep 14 23:20:44 PDT 2017
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Fri, 8 Sep 2017 10:26:19 +0200
+Subject: ipv6: fix memory leak with multiple tables during netns destruction
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+
+[ Upstream commit ba1cc08d9488c94cb8d94f545305688b72a2a300 ]
+
+fib6_net_exit only frees the main and local tables. If another table was
+created with fib6_alloc_table, we leak it when the netns is destroyed.
+
+Fix this in the same way ip_fib_net_exit cleans up tables, by walking
+through the whole hashtable of fib6_table's. We can get rid of the
+special cases for local and main, since they're also part of the
+hashtable.
+
+Reproducer:
+ ip netns add x
+ ip -net x -6 rule add from 6003:1::/64 table 100
+ ip netns del x
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Fixes: 58f09b78b730 ("[NETNS][IPV6] ip6_fib - make it per network namespace")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_fib.c | 25 +++++++++++++++++++------
+ 1 file changed, 19 insertions(+), 6 deletions(-)
+
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -198,6 +198,12 @@ static void rt6_release(struct rt6_info
+ }
+ }
+
++static void fib6_free_table(struct fib6_table *table)
++{
++ inetpeer_invalidate_tree(&table->tb6_peers);
++ kfree(table);
++}
++
+ static void fib6_link_table(struct net *net, struct fib6_table *tb)
+ {
+ unsigned int h;
+@@ -1915,15 +1921,22 @@ out_timer:
+
+ static void fib6_net_exit(struct net *net)
+ {
++ unsigned int i;
++
+ rt6_ifdown(net, NULL);
+ del_timer_sync(&net->ipv6.ip6_fib_timer);
+
+-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+- inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers);
+- kfree(net->ipv6.fib6_local_tbl);
+-#endif
+- inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers);
+- kfree(net->ipv6.fib6_main_tbl);
++ for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
++ struct hlist_head *head = &net->ipv6.fib_table_hash[i];
++ struct hlist_node *tmp;
++ struct fib6_table *tb;
++
++ hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) {
++ hlist_del(&tb->tb6_hlist);
++ fib6_free_table(tb);
++ }
++ }
++
+ kfree(net->ipv6.fib_table_hash);
+ kfree(net->ipv6.rt6_stats);
+ }
--- /dev/null
+From foo@baz Thu Sep 14 23:20:44 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 8 Sep 2017 15:48:47 -0700
+Subject: ipv6: fix typo in fib6_net_exit()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 32a805baf0fb70b6dbedefcd7249ac7f580f9e3b ]
+
+IPv6 FIB should use FIB6_TABLE_HASHSZ, not FIB_TABLE_HASHSZ.
+
+Fixes: ba1cc08d9488 ("ipv6: fix memory leak with multiple tables during netns destruction")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_fib.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -1926,7 +1926,7 @@ static void fib6_net_exit(struct net *ne
+ rt6_ifdown(net, NULL);
+ del_timer_sync(&net->ipv6.ip6_fib_timer);
+
+- for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
++ for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
+ struct hlist_head *head = &net->ipv6.fib_table_hash[i];
+ struct hlist_node *tmp;
+ struct fib6_table *tb;
--- /dev/null
+From foo@baz Thu Sep 14 23:20:44 PDT 2017
+From: Jesper Dangaard Brouer <brouer@redhat.com>
+Date: Fri, 1 Sep 2017 11:26:13 +0200
+Subject: Revert "net: fix percpu memory leaks"
+
+From: Jesper Dangaard Brouer <brouer@redhat.com>
+
+
+[ Upstream commit 5a63643e583b6a9789d7a225ae076fb4e603991c ]
+
+This reverts commit 1d6119baf0610f813eb9d9580eb4fd16de5b4ceb.
+
+After reverting commit 6d7b857d541e ("net: use lib/percpu_counter API
+for fragmentation mem accounting") then here is no need for this
+fix-up patch. As percpu_counter is no longer used, it cannot
+memory leak it any-longer.
+
+Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting")
+Fixes: 1d6119baf061 ("net: fix percpu memory leaks")
+Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_frag.h | 7 +------
+ net/ieee802154/6lowpan/reassembly.c | 11 +++--------
+ net/ipv4/ip_fragment.c | 12 +++---------
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 12 +++---------
+ net/ipv6/reassembly.c | 12 +++---------
+ 5 files changed, 13 insertions(+), 41 deletions(-)
+
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -103,15 +103,10 @@ struct inet_frags {
+ int inet_frags_init(struct inet_frags *);
+ void inet_frags_fini(struct inet_frags *);
+
+-static inline int inet_frags_init_net(struct netns_frags *nf)
++static inline void inet_frags_init_net(struct netns_frags *nf)
+ {
+ atomic_set(&nf->mem, 0);
+- return 0;
+ }
+-static inline void inet_frags_uninit_net(struct netns_frags *nf)
+-{
+-}
+-
+ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
+
+ void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
+--- a/net/ieee802154/6lowpan/reassembly.c
++++ b/net/ieee802154/6lowpan/reassembly.c
+@@ -580,19 +580,14 @@ static int __net_init lowpan_frags_init_
+ {
+ struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ net_ieee802154_lowpan(net);
+- int res;
+
+ ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+
+- res = inet_frags_init_net(&ieee802154_lowpan->frags);
+- if (res)
+- return res;
+- res = lowpan_frags_ns_sysctl_register(net);
+- if (res)
+- inet_frags_uninit_net(&ieee802154_lowpan->frags);
+- return res;
++ inet_frags_init_net(&ieee802154_lowpan->frags);
++
++ return lowpan_frags_ns_sysctl_register(net);
+ }
+
+ static void __net_exit lowpan_frags_exit_net(struct net *net)
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -844,8 +844,6 @@ static void __init ip4_frags_ctl_registe
+
+ static int __net_init ipv4_frags_init_net(struct net *net)
+ {
+- int res;
+-
+ /* Fragment cache limits.
+ *
+ * The fragment memory accounting code, (tries to) account for
+@@ -871,13 +869,9 @@ static int __net_init ipv4_frags_init_ne
+
+ net->ipv4.frags.max_dist = 64;
+
+- res = inet_frags_init_net(&net->ipv4.frags);
+- if (res)
+- return res;
+- res = ip4_frags_ns_ctl_register(net);
+- if (res)
+- inet_frags_uninit_net(&net->ipv4.frags);
+- return res;
++ inet_frags_init_net(&net->ipv4.frags);
++
++ return ip4_frags_ns_ctl_register(net);
+ }
+
+ static void __net_exit ipv4_frags_exit_net(struct net *net)
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -622,18 +622,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
+
+ static int nf_ct_net_init(struct net *net)
+ {
+- int res;
+-
+ net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
+- res = inet_frags_init_net(&net->nf_frag.frags);
+- if (res)
+- return res;
+- res = nf_ct_frag6_sysctl_register(net);
+- if (res)
+- inet_frags_uninit_net(&net->nf_frag.frags);
+- return res;
++ inet_frags_init_net(&net->nf_frag.frags);
++
++ return nf_ct_frag6_sysctl_register(net);
+ }
+
+ static void nf_ct_net_exit(struct net *net)
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -714,19 +714,13 @@ static void ip6_frags_sysctl_unregister(
+
+ static int __net_init ipv6_frags_init_net(struct net *net)
+ {
+- int res;
+-
+ net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
+
+- res = inet_frags_init_net(&net->ipv6.frags);
+- if (res)
+- return res;
+- res = ip6_frags_ns_sysctl_register(net);
+- if (res)
+- inet_frags_uninit_net(&net->ipv6.frags);
+- return res;
++ inet_frags_init_net(&net->ipv6.frags);
++
++ return ip6_frags_ns_sysctl_register(net);
+ }
+
+ static void __net_exit ipv6_frags_exit_net(struct net *net)
--- /dev/null
+From foo@baz Thu Sep 14 23:20:44 PDT 2017
+From: Jesper Dangaard Brouer <brouer@redhat.com>
+Date: Fri, 1 Sep 2017 11:26:08 +0200
+Subject: Revert "net: use lib/percpu_counter API for fragmentation mem accounting"
+
+From: Jesper Dangaard Brouer <brouer@redhat.com>
+
+
+[ Upstream commit fb452a1aa3fd4034d7999e309c5466ff2d7005aa ]
+
+This reverts commit 6d7b857d541ecd1d9bd997c97242d4ef94b19de2.
+
+There is a bug in fragmentation codes use of the percpu_counter API,
+that can cause issues on systems with many CPUs.
+
+The frag_mem_limit() just reads the global counter (fbc->count),
+without considering other CPUs can have upto batch size (130K) that
+haven't been subtracted yet. Due to the 3MBytes lower thresh limit,
+this become dangerous at >=24 CPUs (3*1024*1024/130000=24).
+
+The correct API usage would be to use __percpu_counter_compare() which
+does the right thing, and takes into account the number of (online)
+CPUs and batch size, to account for this and call __percpu_counter_sum()
+when needed.
+
+We choose to revert the use of the lib/percpu_counter API for frag
+memory accounting for several reasons:
+
+1) On systems with CPUs > 24, the heavier fully locked
+ __percpu_counter_sum() is always invoked, which will be more
+ expensive than the atomic_t that is reverted to.
+
+Given systems with more than 24 CPUs are becoming common this doesn't
+seem like a good option. To mitigate this, the batch size could be
+decreased and thresh be increased.
+
+2) The add_frag_mem_limit+sub_frag_mem_limit pairs happen on the RX
+ CPU, before SKBs are pushed into sockets on remote CPUs. Given
+ NICs can only hash on L2 part of the IP-header, the NIC-RXq's will
+ likely be limited. Thus, a fair chance that atomic add+dec happen
+ on the same CPU.
+
+Revert note that commit 1d6119baf061 ("net: fix percpu memory leaks")
+removed init_frag_mem_limit() and instead use inet_frags_init_net().
+After this revert, inet_frags_uninit_net() becomes empty.
+
+Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting")
+Fixes: 1d6119baf061 ("net: fix percpu memory leaks")
+Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Acked-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_frag.h | 30 +++++++++---------------------
+ net/ipv4/inet_fragment.c | 4 +---
+ 2 files changed, 10 insertions(+), 24 deletions(-)
+
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -1,14 +1,9 @@
+ #ifndef __NET_FRAG_H__
+ #define __NET_FRAG_H__
+
+-#include <linux/percpu_counter.h>
+-
+ struct netns_frags {
+- /* The percpu_counter "mem" need to be cacheline aligned.
+- * mem.count must not share cacheline with other writers
+- */
+- struct percpu_counter mem ____cacheline_aligned_in_smp;
+-
++ /* Keep atomic mem on separate cachelines in structs that include it */
++ atomic_t mem ____cacheline_aligned_in_smp;
+ /* sysctls */
+ int timeout;
+ int high_thresh;
+@@ -110,11 +105,11 @@ void inet_frags_fini(struct inet_frags *
+
+ static inline int inet_frags_init_net(struct netns_frags *nf)
+ {
+- return percpu_counter_init(&nf->mem, 0, GFP_KERNEL);
++ atomic_set(&nf->mem, 0);
++ return 0;
+ }
+ static inline void inet_frags_uninit_net(struct netns_frags *nf)
+ {
+- percpu_counter_destroy(&nf->mem);
+ }
+
+ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
+@@ -140,31 +135,24 @@ static inline bool inet_frag_evicting(st
+
+ /* Memory Tracking Functions. */
+
+-/* The default percpu_counter batch size is not big enough to scale to
+- * fragmentation mem acct sizes.
+- * The mem size of a 64K fragment is approx:
+- * (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes
+- */
+-static unsigned int frag_percpu_counter_batch = 130000;
+-
+ static inline int frag_mem_limit(struct netns_frags *nf)
+ {
+- return percpu_counter_read(&nf->mem);
++ return atomic_read(&nf->mem);
+ }
+
+ static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
+ {
+- percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch);
++ atomic_sub(i, &nf->mem);
+ }
+
+ static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
+ {
+- percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch);
++ atomic_add(i, &nf->mem);
+ }
+
+-static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)
++static inline int sum_frag_mem_limit(struct netns_frags *nf)
+ {
+- return percpu_counter_sum_positive(&nf->mem);
++ return atomic_read(&nf->mem);
+ }
+
+ /* RFC 3168 support :
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -234,10 +234,8 @@ evict_again:
+ cond_resched();
+
+ if (read_seqretry(&f->rnd_seqlock, seq) ||
+- percpu_counter_sum(&nf->mem))
++ sum_frag_mem_limit(nf))
+ goto evict_again;
+-
+- percpu_counter_destroy(&nf->mem);
+ }
+ EXPORT_SYMBOL(inet_frags_exit_net);
+
--- /dev/null
+From foo@baz Thu Sep 14 23:20:44 PDT 2017
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Date: Fri, 8 Sep 2017 11:35:21 -0300
+Subject: sctp: fix missing wake ups in some situations
+
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+
+
+[ Upstream commit 7906b00f5cd1cd484fced7fcda892176e3202c8a ]
+
+Commit fb586f25300f ("sctp: delay calls to sk_data_ready() as much as
+possible") minimized the number of wake ups that are triggered in case
+the association receives a packet with multiple data chunks on it and/or
+when io_events are enabled and then commit 0970f5b36659 ("sctp: signal
+sk_data_ready earlier on data chunks reception") moved the wake up to as
+soon as possible. It thus relies on the state machine running later to
+clean the flag that the event was already generated.
+
+The issue is that there are 2 call paths that calls
+sctp_ulpq_tail_event() outside of the state machine, causing the flag to
+linger and possibly omitting a needed wake up in the sequence.
+
+One of the call paths is when enabling SCTP_SENDER_DRY_EVENTS via
+setsockopt(SCTP_EVENTS), as noticed by Harald Welte. The other is when
+partial reliability triggers removal of chunks from the send queue when
+the application calls sendmsg().
+
+This commit fixes it by not setting the flag in case the socket is not
+owned by the user, as it won't be cleaned later. This works for
+user-initiated calls and also for rx path processing.
+
+Fixes: fb586f25300f ("sctp: delay calls to sk_data_ready() as much as possible")
+Reported-by: Harald Welte <laforge@gnumonks.org>
+Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/ulpqueue.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/sctp/ulpqueue.c
++++ b/net/sctp/ulpqueue.c
+@@ -265,7 +265,8 @@ int sctp_ulpq_tail_event(struct sctp_ulp
+ sctp_ulpq_clear_pd(ulpq);
+
+ if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) {
+- sp->data_ready_signalled = 1;
++ if (!sock_owned_by_user(sk))
++ sp->data_ready_signalled = 1;
+ sk->sk_data_ready(sk);
+ }
+ return 1;
--- /dev/null
+From foo@baz Thu Sep 14 23:20:44 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 8 Sep 2017 12:44:47 -0700
+Subject: tcp: fix a request socket leak
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 1f3b359f1004bd34b7b0bad70b93e3c7af92a37b ]
+
+While the cited commit fixed a possible deadlock, it added a leak
+of the request socket, since reqsk_put() must be called if the BPF
+filter decided the ACK packet must be dropped.
+
+Fixes: d624d276d1dd ("tcp: fix possible deadlock in TCP stack vs BPF filter")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_ipv4.c | 6 +++---
+ net/ipv6/tcp_ipv6.c | 6 +++---
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1722,9 +1722,9 @@ process:
+ */
+ sock_hold(sk);
+ refcounted = true;
+- if (tcp_filter(sk, skb))
+- goto discard_and_relse;
+- nsk = tcp_check_req(sk, skb, req, false);
++ nsk = NULL;
++ if (!tcp_filter(sk, skb))
++ nsk = tcp_check_req(sk, skb, req, false);
+ if (!nsk) {
+ reqsk_put(req);
+ goto discard_and_relse;
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1456,9 +1456,9 @@ process:
+ }
+ sock_hold(sk);
+ refcounted = true;
+- if (tcp_filter(sk, skb))
+- goto discard_and_relse;
+- nsk = tcp_check_req(sk, skb, req, false);
++ nsk = NULL;
++ if (!tcp_filter(sk, skb))
++ nsk = tcp_check_req(sk, skb, req, false);
+ if (!nsk) {
+ reqsk_put(req);
+ goto discard_and_relse;
--- /dev/null
+From foo@baz Thu Sep 14 23:20:44 PDT 2017
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Wed, 6 Sep 2017 14:44:36 +0200
+Subject: udp: drop head states only when all skb references are gone
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+
+[ Upstream commit ca2c1418efe9f7fe37aa1f355efdf4eb293673ce ]
+
+After commit 0ddf3fb2c43d ("udp: preserve skb->dst if required
+for IP options processing") we clear the skb head state as soon
+as the skb carrying them is first processed.
+
+Since the same skb can be processed several times when MSG_PEEK
+is used, we can end up lacking the required head states, and
+eventually oopsing.
+
+Fix this clearing the skb head state only when processing the
+last skb reference.
+
+Reported-by: Eric Dumazet <edumazet@google.com>
+Fixes: 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h | 2 +-
+ net/core/skbuff.c | 9 +++------
+ net/ipv4/udp.c | 5 ++++-
+ 3 files changed, 8 insertions(+), 8 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -885,7 +885,7 @@ void kfree_skb(struct sk_buff *skb);
+ void kfree_skb_list(struct sk_buff *segs);
+ void skb_tx_error(struct sk_buff *skb);
+ void consume_skb(struct sk_buff *skb);
+-void consume_stateless_skb(struct sk_buff *skb);
++void __consume_stateless_skb(struct sk_buff *skb);
+ void __kfree_skb(struct sk_buff *skb);
+ extern struct kmem_cache *skbuff_head_cache;
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -753,14 +753,11 @@ EXPORT_SYMBOL(consume_skb);
+ * consume_stateless_skb - free an skbuff, assuming it is stateless
+ * @skb: buffer to free
+ *
+- * Works like consume_skb(), but this variant assumes that all the head
+- * states have been already dropped.
++ * Alike consume_skb(), but this variant assumes that this is the last
++ * skb reference and all the head states have been already dropped
+ */
+-void consume_stateless_skb(struct sk_buff *skb)
++void __consume_stateless_skb(struct sk_buff *skb)
+ {
+- if (!skb_unref(skb))
+- return;
+-
+ trace_consume_skb(skb);
+ if (likely(skb->head))
+ skb_release_data(skb);
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1386,12 +1386,15 @@ void skb_consume_udp(struct sock *sk, st
+ unlock_sock_fast(sk, slow);
+ }
+
++ if (!skb_unref(skb))
++ return;
++
+ /* In the more common cases we cleared the head states previously,
+ * see __udp_queue_rcv_skb().
+ */
+ if (unlikely(udp_skb_has_head_state(skb)))
+ skb_release_head_state(skb);
+- consume_stateless_skb(skb);
++ __consume_stateless_skb(skb);
+ }
+ EXPORT_SYMBOL_GPL(skb_consume_udp);
+
--- /dev/null
+From foo@baz Thu Sep 14 23:20:44 PDT 2017
+From: Jason Wang <jasowang@redhat.com>
+Date: Tue, 5 Sep 2017 09:22:05 +0800
+Subject: vhost_net: correctly check tx avail during rx busy polling
+
+From: Jason Wang <jasowang@redhat.com>
+
+
+[ Upstream commit 8b949bef9172ca69d918e93509a4ecb03d0355e0 ]
+
+We check tx avail through vhost_enable_notify() in the past which is
+wrong since it only checks whether or not guest has filled more
+available buffer since last avail idx synchronization which was just
+done by vhost_vq_avail_empty() before. What we really want is checking
+pending buffers in the avail ring. Fix this by calling
+vhost_vq_avail_empty() instead.
+
+This issue could be noticed by doing netperf TCP_RR benchmark as
+client from guest (but not host). With this fix, TCP_RR from guest to
+localhost restores from 1375.91 trans per sec to 55235.28 trans per
+sec on my laptop (Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz).
+
+Fixes: 030881372460 ("vhost_net: basic polling support")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/net.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -634,8 +634,13 @@ static int vhost_net_rx_peek_head_len(st
+
+ preempt_enable();
+
+- if (vhost_enable_notify(&net->dev, vq))
++ if (!vhost_vq_avail_empty(&net->dev, vq))
+ vhost_poll_queue(&vq->poll);
++ else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
++ vhost_disable_notify(&net->dev, vq);
++ vhost_poll_queue(&vq->poll);
++ }
++
+ mutex_unlock(&vq->mutex);
+
+ len = peek_head_len(rvq, sk);