--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Vishal Kulkarni <vishal@chelsio.com>
+Date: Wed, 30 Oct 2019 20:17:57 +0530
+Subject: cxgb4: fix panic when attaching to ULD fail
+
+From: Vishal Kulkarni <vishal@chelsio.com>
+
+[ Upstream commit fc89cc358fb64e2429aeae0f37906126636507ec ]
+
+Release resources when attaching to ULD fail. Otherwise, data
+mismatch is seen between LLD and ULD later on, which lead to
+kernel panic when accessing resources that should not even
+exist in the first place.
+
+Fixes: 94cdb8bb993a ("cxgb4: Add support for dynamic allocation of resources for ULD")
+Signed-off-by: Shahjada Abul Husain <shahjada@chelsio.com>
+Signed-off-by: Vishal Kulkarni <vishal@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 28 ++++++++++++++-----------
+ 1 file changed, 16 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+@@ -695,10 +695,10 @@ static void uld_init(struct adapter *ada
+ lld->write_cmpl_support = adap->params.write_cmpl_support;
+ }
+
+-static void uld_attach(struct adapter *adap, unsigned int uld)
++static int uld_attach(struct adapter *adap, unsigned int uld)
+ {
+- void *handle;
+ struct cxgb4_lld_info lli;
++ void *handle;
+
+ uld_init(adap, &lli);
+ uld_queue_init(adap, uld, &lli);
+@@ -708,7 +708,7 @@ static void uld_attach(struct adapter *a
+ dev_warn(adap->pdev_dev,
+ "could not attach to the %s driver, error %ld\n",
+ adap->uld[uld].name, PTR_ERR(handle));
+- return;
++ return PTR_ERR(handle);
+ }
+
+ adap->uld[uld].handle = handle;
+@@ -716,22 +716,22 @@ static void uld_attach(struct adapter *a
+
+ if (adap->flags & CXGB4_FULL_INIT_DONE)
+ adap->uld[uld].state_change(handle, CXGB4_STATE_UP);
++
++ return 0;
+ }
+
+-/**
+- * cxgb4_register_uld - register an upper-layer driver
+- * @type: the ULD type
+- * @p: the ULD methods
++/* cxgb4_register_uld - register an upper-layer driver
++ * @type: the ULD type
++ * @p: the ULD methods
+ *
+- * Registers an upper-layer driver with this driver and notifies the ULD
+- * about any presently available devices that support its type. Returns
+- * %-EBUSY if a ULD of the same type is already registered.
++ * Registers an upper-layer driver with this driver and notifies the ULD
++ * about any presently available devices that support its type.
+ */
+ void cxgb4_register_uld(enum cxgb4_uld type,
+ const struct cxgb4_uld_info *p)
+ {
+- int ret = 0;
+ struct adapter *adap;
++ int ret = 0;
+
+ if (type >= CXGB4_ULD_MAX)
+ return;
+@@ -763,8 +763,12 @@ void cxgb4_register_uld(enum cxgb4_uld t
+ if (ret)
+ goto free_irq;
+ adap->uld[type] = *p;
+- uld_attach(adap, type);
++ ret = uld_attach(adap, type);
++ if (ret)
++ goto free_txq;
+ continue;
++free_txq:
++ release_sge_txq_uld(adap, type);
+ free_irq:
+ if (adap->flags & CXGB4_FULL_INIT_DONE)
+ quiesce_rx_uld(adap, type);
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Raju Rangoju <rajur@chelsio.com>
+Date: Wed, 23 Oct 2019 23:03:55 +0530
+Subject: cxgb4: request the TX CIDX updates to status page
+
+From: Raju Rangoju <rajur@chelsio.com>
+
+[ Upstream commit 7c3bebc3d8688b84795c11848c314a2fbfe045e0 ]
+
+For adapters which support the SGE Doorbell Queue Timer facility,
+we configured the Ethernet TX Queues to send CIDX Updates to the
+Associated Ethernet RX Response Queue with CPL_SGE_EGR_UPDATE
+messages to allow us to respond more quickly to the CIDX Updates.
+But, this was adding load to PCIe Link RX bandwidth and,
+potentially, resulting in higher CPU Interrupt load.
+
+This patch requests the HW to deliver the CIDX updates to the TX
+queue status page rather than generating an ingress queue message
+(as an interrupt). With this patch, the load on RX bandwidth is
+reduced and a substantial improvement in BW is noticed at lower
+IO sizes.
+
+Fixes: d429005fdf2c ("cxgb4/cxgb4vf: Add support for SGE doorbell queue timer")
+Signed-off-by: Raju Rangoju <rajur@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/sge.c | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
+@@ -3791,15 +3791,11 @@ int t4_sge_alloc_eth_txq(struct adapter
+ * write the CIDX Updates into the Status Page at the end of the
+ * TX Queue.
+ */
+- c.autoequiqe_to_viid = htonl((dbqt
+- ? FW_EQ_ETH_CMD_AUTOEQUIQE_F
+- : FW_EQ_ETH_CMD_AUTOEQUEQE_F) |
++ c.autoequiqe_to_viid = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
+ FW_EQ_ETH_CMD_VIID_V(pi->viid));
+
+ c.fetchszm_to_iqid =
+- htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(dbqt
+- ? HOSTFCMODE_INGRESS_QUEUE_X
+- : HOSTFCMODE_STATUS_PAGE_X) |
++ htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) |
+ FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) |
+ FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid));
+
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 4 Nov 2019 07:57:55 -0800
+Subject: dccp: do not leak jiffies on the wire
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3d1e5039f5f87a8731202ceca08764ee7cb010d3 ]
+
+For some reason I missed the case of DCCP passive
+flows in my previous patch.
+
+Fixes: a904a0693c18 ("inet: stop leaking jiffies on the wire")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Thiemo Nagel <tnagel@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv4.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -416,7 +416,7 @@ struct sock *dccp_v4_request_recv_sock(c
+ RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
+ newinet->mc_index = inet_iif(skb);
+ newinet->mc_ttl = ip_hdr(skb)->ttl;
+- newinet->inet_id = jiffies;
++ newinet->inet_id = prandom_u32();
+
+ if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
+ goto put_and_exit;
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Xin Long <lucien.xin@gmail.com>
+Date: Mon, 28 Oct 2019 23:19:35 +0800
+Subject: erspan: fix the tun_info options_len check for erspan
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 2eb8d6d2910cfe3dc67dc056f26f3dd9c63d47cd ]
+
+The check for !md doens't really work for ip_tunnel_info_opts(info) which
+only does info + 1. Also to avoid out-of-bounds access on info, it should
+ensure options_len is not less than erspan_metadata in both erspan_xmit()
+and ip6erspan_tunnel_xmit().
+
+Fixes: 1a66a836da ("gre: add collect_md mode to ERSPAN tunnel")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_gre.c | 4 ++--
+ net/ipv6/ip6_gre.c | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -509,9 +509,9 @@ static void erspan_fb_xmit(struct sk_buf
+ key = &tun_info->key;
+ if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+ goto err_free_skb;
+- md = ip_tunnel_info_opts(tun_info);
+- if (!md)
++ if (tun_info->options_len < sizeof(*md))
+ goto err_free_skb;
++ md = ip_tunnel_info_opts(tun_info);
+
+ /* ERSPAN has fixed 8 byte GRE header */
+ version = md->version;
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -980,9 +980,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit
+ dsfield = key->tos;
+ if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+ goto tx_err;
+- md = ip_tunnel_info_opts(tun_info);
+- if (!md)
++ if (tun_info->options_len < sizeof(*md))
+ goto tx_err;
++ md = ip_tunnel_info_opts(tun_info);
+
+ tun_id = tunnel_id_to_key32(key->tun_id);
+ if (md->version == 1) {
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 1 Nov 2019 10:32:19 -0700
+Subject: inet: stop leaking jiffies on the wire
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a904a0693c189691eeee64f6c6b188bd7dc244e9 ]
+
+Historically linux tried to stick to RFC 791, 1122, 2003
+for IPv4 ID field generation.
+
+RFC 6864 made clear that no matter how hard we try,
+we can not ensure unicity of IP ID within maximum
+lifetime for all datagrams with a given source
+address/destination address/protocol tuple.
+
+Linux uses a per socket inet generator (inet_id), initialized
+at connection startup with a XOR of 'jiffies' and other
+fields that appear clear on the wire.
+
+Thiemo Nagel pointed that this strategy is a privacy
+concern as this provides 16 bits of entropy to fingerprint
+devices.
+
+Let's switch to a random starting point, this is just as
+good as far as RFC 6864 is concerned and does not leak
+anything critical.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Thiemo Nagel <tnagel@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/chelsio/chtls/chtls_cm.c | 2 +-
+ net/dccp/ipv4.c | 2 +-
+ net/ipv4/datagram.c | 2 +-
+ net/ipv4/tcp_ipv4.c | 4 ++--
+ net/sctp/socket.c | 2 +-
+ 5 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
++++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
+@@ -1297,7 +1297,7 @@ static void make_established(struct sock
+ tp->write_seq = snd_isn;
+ tp->snd_nxt = snd_isn;
+ tp->snd_una = snd_isn;
+- inet_sk(sk)->inet_id = tp->write_seq ^ jiffies;
++ inet_sk(sk)->inet_id = prandom_u32();
+ assign_rxopt(sk, opt);
+
+ if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -117,7 +117,7 @@ int dccp_v4_connect(struct sock *sk, str
+ inet->inet_daddr,
+ inet->inet_sport,
+ inet->inet_dport);
+- inet->inet_id = dp->dccps_iss ^ jiffies;
++ inet->inet_id = prandom_u32();
+
+ err = dccp_connect(sk);
+ rt = NULL;
+--- a/net/ipv4/datagram.c
++++ b/net/ipv4/datagram.c
+@@ -73,7 +73,7 @@ int __ip4_datagram_connect(struct sock *
+ reuseport_has_conns(sk, true);
+ sk->sk_state = TCP_ESTABLISHED;
+ sk_set_txhash(sk);
+- inet->inet_id = jiffies;
++ inet->inet_id = prandom_u32();
+
+ sk_dst_set(sk, &rt->dst);
+ err = 0;
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -300,7 +300,7 @@ int tcp_v4_connect(struct sock *sk, stru
+ inet->inet_daddr);
+ }
+
+- inet->inet_id = tp->write_seq ^ jiffies;
++ inet->inet_id = prandom_u32();
+
+ if (tcp_fastopen_defer_connect(sk, &err))
+ return err;
+@@ -1443,7 +1443,7 @@ struct sock *tcp_v4_syn_recv_sock(const
+ inet_csk(newsk)->icsk_ext_hdr_len = 0;
+ if (inet_opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
+- newinet->inet_id = newtp->write_seq ^ jiffies;
++ newinet->inet_id = prandom_u32();
+
+ if (!dst) {
+ dst = inet_csk_route_child_sock(sk, newsk, req);
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -9159,7 +9159,7 @@ void sctp_copy_sock(struct sock *newsk,
+ newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
+ newinet->inet_dport = htons(asoc->peer.port);
+ newinet->pmtudisc = inet->pmtudisc;
+- newinet->inet_id = asoc->next_tsn ^ jiffies;
++ newinet->inet_id = prandom_u32();
+
+ newinet->uc_ttl = inet->uc_ttl;
+ newinet->mc_loop = 1;
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 19 Oct 2019 09:26:37 -0700
+Subject: ipv4: fix IPSKB_FRAG_PMTU handling with fragmentation
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e7a409c3f46cb0dbc7bfd4f6f9421d53e92614a5 ]
+
+This patch removes the iph field from the state structure, which is not
+properly initialized. Instead, add a new field to make the "do we want
+to set DF" be the state bit and move the code to set the DF flag from
+ip_frag_next().
+
+Joint work with Pablo and Linus.
+
+Fixes: 19c3401a917b ("net: ipv4: place control buffer handling away from fragmentation iterators")
+Reported-by: Patrick Schönthaler <patrick@notvads.ovh>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ip.h | 4 ++--
+ net/bridge/netfilter/nf_conntrack_bridge.c | 2 +-
+ net/ipv4/ip_output.c | 11 ++++++-----
+ 3 files changed, 9 insertions(+), 8 deletions(-)
+
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -184,7 +184,7 @@ static inline struct sk_buff *ip_fraglis
+ }
+
+ struct ip_frag_state {
+- struct iphdr *iph;
++ bool DF;
+ unsigned int hlen;
+ unsigned int ll_rs;
+ unsigned int mtu;
+@@ -195,7 +195,7 @@ struct ip_frag_state {
+ };
+
+ void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs,
+- unsigned int mtu, struct ip_frag_state *state);
++ unsigned int mtu, bool DF, struct ip_frag_state *state);
+ struct sk_buff *ip_frag_next(struct sk_buff *skb,
+ struct ip_frag_state *state);
+
+--- a/net/bridge/netfilter/nf_conntrack_bridge.c
++++ b/net/bridge/netfilter/nf_conntrack_bridge.c
+@@ -94,7 +94,7 @@ slow_path:
+ * This may also be a clone skbuff, we could preserve the geometry for
+ * the copies but probably not worth the effort.
+ */
+- ip_frag_init(skb, hlen, ll_rs, frag_max_size, &state);
++ ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state);
+
+ while (state.left > 0) {
+ struct sk_buff *skb2;
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -645,11 +645,12 @@ void ip_fraglist_prepare(struct sk_buff
+ EXPORT_SYMBOL(ip_fraglist_prepare);
+
+ void ip_frag_init(struct sk_buff *skb, unsigned int hlen,
+- unsigned int ll_rs, unsigned int mtu,
++ unsigned int ll_rs, unsigned int mtu, bool DF,
+ struct ip_frag_state *state)
+ {
+ struct iphdr *iph = ip_hdr(skb);
+
++ state->DF = DF;
+ state->hlen = hlen;
+ state->ll_rs = ll_rs;
+ state->mtu = mtu;
+@@ -668,9 +669,6 @@ static void ip_frag_ipcb(struct sk_buff
+ /* Copy the flags to each fragment. */
+ IPCB(to)->flags = IPCB(from)->flags;
+
+- if (IPCB(from)->flags & IPSKB_FRAG_PMTU)
+- state->iph->frag_off |= htons(IP_DF);
+-
+ /* ANK: dirty, but effective trick. Upgrade options only if
+ * the segment to be fragmented was THE FIRST (otherwise,
+ * options are already fixed) and make it ONCE
+@@ -738,6 +736,8 @@ struct sk_buff *ip_frag_next(struct sk_b
+ */
+ iph = ip_hdr(skb2);
+ iph->frag_off = htons((state->offset >> 3));
++ if (state->DF)
++ iph->frag_off |= htons(IP_DF);
+
+ /*
+ * Added AC : If we are fragmenting a fragment that's not the
+@@ -881,7 +881,8 @@ slow_path:
+ * Fragment the datagram.
+ */
+
+- ip_frag_init(skb, hlen, ll_rs, mtu, &state);
++ ip_frag_init(skb, hlen, ll_rs, mtu, IPCB(skb)->flags & IPSKB_FRAG_PMTU,
++ &state);
+
+ /*
+ * Keep copying data until we run out.
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Sat, 26 Oct 2019 11:53:39 +0200
+Subject: ipv4: fix route update on metric change.
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 0b834ba00ab5337e938c727e216e1f5249794717 ]
+
+Since commit af4d768ad28c ("net/ipv4: Add support for specifying metric
+of connected routes"), when updating an IP address with a different metric,
+the associated connected route is updated, too.
+
+Still, the mentioned commit doesn't handle properly some corner cases:
+
+$ ip addr add dev eth0 192.168.1.0/24
+$ ip addr add dev eth0 192.168.2.1/32 peer 192.168.2.2
+$ ip addr add dev eth0 192.168.3.1/24
+$ ip addr change dev eth0 192.168.1.0/24 metric 10
+$ ip addr change dev eth0 192.168.2.1/32 peer 192.168.2.2 metric 10
+$ ip addr change dev eth0 192.168.3.1/24 metric 10
+$ ip -4 route
+192.168.1.0/24 dev eth0 proto kernel scope link src 192.168.1.0
+192.168.2.2 dev eth0 proto kernel scope link src 192.168.2.1
+192.168.3.0/24 dev eth0 proto kernel scope link src 192.168.2.1 metric 10
+
+Only the last route is correctly updated.
+
+The problem is the current test in fib_modify_prefix_metric():
+
+ if (!(dev->flags & IFF_UP) ||
+ ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
+ ipv4_is_zeronet(prefix) ||
+ prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32)
+
+Which should be the logical 'not' of the pre-existing test in
+fib_add_ifaddr():
+
+ if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
+ (prefix != addr || ifa->ifa_prefixlen < 32))
+
+To properly negate the original expression, we need to change the last
+logical 'or' to a logical 'and'.
+
+Fixes: af4d768ad28c ("net/ipv4: Add support for specifying metric of connected routes")
+Reported-and-suggested-by: Beniamino Galvani <bgalvani@redhat.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -1147,7 +1147,7 @@ void fib_modify_prefix_metric(struct in_
+ if (!(dev->flags & IFF_UP) ||
+ ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
+ ipv4_is_zeronet(prefix) ||
+- prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32)
++ (prefix == ifa->ifa_local && ifa->ifa_prefixlen == 32))
+ return;
+
+ /* add the new */
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Takeshi Misawa <jeliantsurux@gmail.com>
+Date: Sat, 19 Oct 2019 15:34:43 +0900
+Subject: keys: Fix memory leak in copy_net_ns
+
+From: Takeshi Misawa <jeliantsurux@gmail.com>
+
+[ Upstream commit 82ecff655e7968151b0047f1b5de03b249e5c1c4 ]
+
+If copy_net_ns() failed after net_alloc(), net->key_domain is leaked.
+Fix this, by freeing key_domain in error path.
+
+syzbot report:
+BUG: memory leak
+unreferenced object 0xffff8881175007e0 (size 32):
+ comm "syz-executor902", pid 7069, jiffies 4294944350 (age 28.400s)
+ hex dump (first 32 bytes):
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ backtrace:
+ [<00000000a83ed741>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline]
+ [<00000000a83ed741>] slab_post_alloc_hook mm/slab.h:439 [inline]
+ [<00000000a83ed741>] slab_alloc mm/slab.c:3326 [inline]
+ [<00000000a83ed741>] kmem_cache_alloc_trace+0x13d/0x280 mm/slab.c:3553
+ [<0000000059fc92b9>] kmalloc include/linux/slab.h:547 [inline]
+ [<0000000059fc92b9>] kzalloc include/linux/slab.h:742 [inline]
+ [<0000000059fc92b9>] net_alloc net/core/net_namespace.c:398 [inline]
+ [<0000000059fc92b9>] copy_net_ns+0xb2/0x220 net/core/net_namespace.c:445
+ [<00000000a9d74bbc>] create_new_namespaces+0x141/0x2a0 kernel/nsproxy.c:103
+ [<000000008047d645>] unshare_nsproxy_namespaces+0x7f/0x100 kernel/nsproxy.c:202
+ [<000000005993ea6e>] ksys_unshare+0x236/0x490 kernel/fork.c:2674
+ [<0000000019417e75>] __do_sys_unshare kernel/fork.c:2742 [inline]
+ [<0000000019417e75>] __se_sys_unshare kernel/fork.c:2740 [inline]
+ [<0000000019417e75>] __x64_sys_unshare+0x16/0x20 kernel/fork.c:2740
+ [<00000000f4c5f2c8>] do_syscall_64+0x76/0x1a0 arch/x86/entry/common.c:296
+ [<0000000038550184>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+syzbot also reported other leak in copy_net_ns -> setup_net.
+This problem is already fixed by cf47a0b882a4e5f6b34c7949d7b293e9287f1972.
+
+Fixes: 9b242610514f ("keys: Network namespace domain tag")
+Reported-and-tested-by: syzbot+3b3296d032353c33184b@syzkaller.appspotmail.com
+Signed-off-by: Takeshi Misawa <jeliantsurux@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/net_namespace.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/net_namespace.c
++++ b/net/core/net_namespace.c
+@@ -478,6 +478,7 @@ struct net *copy_net_ns(unsigned long fl
+
+ if (rv < 0) {
+ put_userns:
++ key_remove_domain(net->key_domain);
+ put_user_ns(user_ns);
+ net_drop_ns(net);
+ dec_ucounts:
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Jiri Pirko <jiri@mellanox.com>
+Date: Wed, 30 Oct 2019 11:04:22 +0200
+Subject: mlxsw: core: Unpublish devlink parameters during reload
+
+From: Jiri Pirko <jiri@mellanox.com>
+
+[ Upstream commit b7265a0df82c1716bf788096217083ed65a8bb14 ]
+
+The devlink parameter "acl_region_rehash_interval" is a runtime
+parameter whose value is stored in a dynamically allocated memory. While
+reloading the driver, this memory is freed and then allocated again. A
+use-after-free might happen if during this time frame someone tries to
+retrieve its value.
+
+Since commit 070c63f20f6c ("net: devlink: allow to change namespaces
+during reload") the use-after-free can be reliably triggered when
+reloading the driver into a namespace, as after freeing the memory (via
+reload_down() callback) all the parameters are notified.
+
+Fix this by unpublishing and then re-publishing the parameters during
+reload.
+
+Fixes: 98bbf70c1c41 ("mlxsw: spectrum: add "acl_region_rehash_interval" devlink param")
+Fixes: 7c62cfb8c574 ("devlink: publish params only after driver init is done")
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/core.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
+@@ -1128,7 +1128,7 @@ __mlxsw_core_bus_device_register(const s
+ if (err)
+ goto err_thermal_init;
+
+- if (mlxsw_driver->params_register && !reload)
++ if (mlxsw_driver->params_register)
+ devlink_params_publish(devlink);
+
+ return 0;
+@@ -1201,7 +1201,7 @@ void mlxsw_core_bus_device_unregister(st
+ return;
+ }
+
+- if (mlxsw_core->driver->params_unregister && !reload)
++ if (mlxsw_core->driver->params_unregister)
+ devlink_params_unpublish(devlink);
+ mlxsw_thermal_fini(mlxsw_core->thermal);
+ mlxsw_hwmon_fini(mlxsw_core->hwmon);
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Oct 2019 22:44:52 -0700
+Subject: net: add READ_ONCE() annotation in __skb_wait_for_more_packets()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7c422d0ce97552dde4a97e6290de70ec6efb0fc6 ]
+
+__skb_wait_for_more_packets() can be called while other cpus
+can feed packets to the socket receive queue.
+
+KCSAN reported :
+
+BUG: KCSAN: data-race in __skb_wait_for_more_packets / __udp_enqueue_schedule_skb
+
+write to 0xffff888102e40b58 of 8 bytes by interrupt on cpu 0:
+ __skb_insert include/linux/skbuff.h:1852 [inline]
+ __skb_queue_before include/linux/skbuff.h:1958 [inline]
+ __skb_queue_tail include/linux/skbuff.h:1991 [inline]
+ __udp_enqueue_schedule_skb+0x2d7/0x410 net/ipv4/udp.c:1470
+ __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline]
+ udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057
+ udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074
+ udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233
+ __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300
+ udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470
+ ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204
+ ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
+ dst_input include/net/dst.h:442 [inline]
+ ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+
+read to 0xffff888102e40b58 of 8 bytes by task 13035 on cpu 1:
+ __skb_wait_for_more_packets+0xfa/0x320 net/core/datagram.c:100
+ __skb_recv_udp+0x374/0x500 net/ipv4/udp.c:1683
+ udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712
+ inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838
+ sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871
+ ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480
+ do_recvmmsg+0x19a/0x5c0 net/socket.c:2601
+ __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680
+ __do_sys_recvmmsg net/socket.c:2703 [inline]
+ __se_sys_recvmmsg net/socket.c:2696 [inline]
+ __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696
+ do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 13035 Comm: syz-executor.3 Not tainted 5.4.0-rc3+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/datagram.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -97,7 +97,7 @@ int __skb_wait_for_more_packets(struct s
+ if (error)
+ goto out_err;
+
+- if (sk->sk_receive_queue.prev != skb)
++ if (READ_ONCE(sk->sk_receive_queue.prev) != skb)
+ goto out;
+
+ /* Socket shut down? */
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Oct 2019 22:44:48 -0700
+Subject: net: add skb_queue_empty_lockless()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit d7d16a89350ab263484c0aa2b523dd3a234e4a80 ]
+
+Some paths call skb_queue_empty() without holding
+the queue lock. We must use a barrier in order
+to not let the compiler do strange things, and avoid
+KCSAN splats.
+
+Adding a barrier in skb_queue_empty() might be overkill,
+I prefer adding a new helper to clearly identify
+points where the callers might be lockless. This might
+help us finding real bugs.
+
+The corresponding WRITE_ONCE() should add zero cost
+for current compilers.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h | 33 ++++++++++++++++++++++++---------
+ 1 file changed, 24 insertions(+), 9 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1501,6 +1501,19 @@ static inline int skb_queue_empty(const
+ }
+
+ /**
++ * skb_queue_empty_lockless - check if a queue is empty
++ * @list: queue head
++ *
++ * Returns true if the queue is empty, false otherwise.
++ * This variant can be used in lockless contexts.
++ */
++static inline bool skb_queue_empty_lockless(const struct sk_buff_head *list)
++{
++ return READ_ONCE(list->next) == (const struct sk_buff *) list;
++}
++
++
++/**
+ * skb_queue_is_last - check if skb is the last entry in the queue
+ * @list: queue head
+ * @skb: buffer
+@@ -1853,9 +1866,11 @@ static inline void __skb_insert(struct s
+ struct sk_buff *prev, struct sk_buff *next,
+ struct sk_buff_head *list)
+ {
+- newsk->next = next;
+- newsk->prev = prev;
+- next->prev = prev->next = newsk;
++ /* see skb_queue_empty_lockless() for the opposite READ_ONCE() */
++ WRITE_ONCE(newsk->next, next);
++ WRITE_ONCE(newsk->prev, prev);
++ WRITE_ONCE(next->prev, newsk);
++ WRITE_ONCE(prev->next, newsk);
+ list->qlen++;
+ }
+
+@@ -1866,11 +1881,11 @@ static inline void __skb_queue_splice(co
+ struct sk_buff *first = list->next;
+ struct sk_buff *last = list->prev;
+
+- first->prev = prev;
+- prev->next = first;
++ WRITE_ONCE(first->prev, prev);
++ WRITE_ONCE(prev->next, first);
+
+- last->next = next;
+- next->prev = last;
++ WRITE_ONCE(last->next, next);
++ WRITE_ONCE(next->prev, last);
+ }
+
+ /**
+@@ -2011,8 +2026,8 @@ static inline void __skb_unlink(struct s
+ next = skb->next;
+ prev = skb->prev;
+ skb->next = skb->prev = NULL;
+- next->prev = prev;
+- prev->next = next;
++ WRITE_ONCE(next->prev, prev);
++ WRITE_ONCE(prev->next, next);
+ }
+
+ /**
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 30 Oct 2019 13:00:04 -0700
+Subject: net: annotate accesses to sk->sk_incoming_cpu
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7170a977743b72cf3eb46ef6ef89885dc7ad3621 ]
+
+This socket field can be read and written by concurrent cpus.
+
+Use READ_ONCE() and WRITE_ONCE() annotations to document this,
+and avoid some compiler 'optimizations'.
+
+KCSAN reported :
+
+BUG: KCSAN: data-race in tcp_v4_rcv / tcp_v4_rcv
+
+write to 0xffff88812220763c of 4 bytes by interrupt on cpu 0:
+ sk_incoming_cpu_update include/net/sock.h:953 [inline]
+ tcp_v4_rcv+0x1b3c/0x1bb0 net/ipv4/tcp_ipv4.c:1934
+ ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204
+ ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
+ dst_input include/net/dst.h:442 [inline]
+ ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+ napi_poll net/core/dev.c:6392 [inline]
+ net_rx_action+0x3ae/0xa90 net/core/dev.c:6460
+ __do_softirq+0x115/0x33f kernel/softirq.c:292
+ do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1082
+ do_softirq.part.0+0x6b/0x80 kernel/softirq.c:337
+ do_softirq kernel/softirq.c:329 [inline]
+ __local_bh_enable_ip+0x76/0x80 kernel/softirq.c:189
+
+read to 0xffff88812220763c of 4 bytes by interrupt on cpu 1:
+ sk_incoming_cpu_update include/net/sock.h:952 [inline]
+ tcp_v4_rcv+0x181a/0x1bb0 net/ipv4/tcp_ipv4.c:1934
+ ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204
+ ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
+ dst_input include/net/dst.h:442 [inline]
+ ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+ napi_poll net/core/dev.c:6392 [inline]
+ net_rx_action+0x3ae/0xa90 net/core/dev.c:6460
+ __do_softirq+0x115/0x33f kernel/softirq.c:292
+ run_ksoftirqd+0x46/0x60 kernel/softirq.c:603
+ smpboot_thread_fn+0x37d/0x4a0 kernel/smpboot.c:165
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.4.0-rc3+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sock.h | 4 ++--
+ net/core/sock.c | 4 ++--
+ net/ipv4/inet_hashtables.c | 2 +-
+ net/ipv4/udp.c | 2 +-
+ net/ipv6/inet6_hashtables.c | 2 +-
+ net/ipv6/udp.c | 2 +-
+ 6 files changed, 8 insertions(+), 8 deletions(-)
+
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -949,8 +949,8 @@ static inline void sk_incoming_cpu_updat
+ {
+ int cpu = raw_smp_processor_id();
+
+- if (unlikely(sk->sk_incoming_cpu != cpu))
+- sk->sk_incoming_cpu = cpu;
++ if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu))
++ WRITE_ONCE(sk->sk_incoming_cpu, cpu);
+ }
+
+ static inline void sock_rps_record_flow_hash(__u32 hash)
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1125,7 +1125,7 @@ set_rcvbuf:
+ break;
+ }
+ case SO_INCOMING_CPU:
+- sk->sk_incoming_cpu = val;
++ WRITE_ONCE(sk->sk_incoming_cpu, val);
+ break;
+
+ case SO_CNX_ADVICE:
+@@ -1474,7 +1474,7 @@ int sock_getsockopt(struct socket *sock,
+ break;
+
+ case SO_INCOMING_CPU:
+- v.val = sk->sk_incoming_cpu;
++ v.val = READ_ONCE(sk->sk_incoming_cpu);
+ break;
+
+ case SO_MEMINFO:
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -240,7 +240,7 @@ static inline int compute_score(struct s
+ return -1;
+
+ score = sk->sk_family == PF_INET ? 2 : 1;
+- if (sk->sk_incoming_cpu == raw_smp_processor_id())
++ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
+ score++;
+ }
+ return score;
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -388,7 +388,7 @@ static int compute_score(struct sock *sk
+ return -1;
+ score += 4;
+
+- if (sk->sk_incoming_cpu == raw_smp_processor_id())
++ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
+ score++;
+ return score;
+ }
+--- a/net/ipv6/inet6_hashtables.c
++++ b/net/ipv6/inet6_hashtables.c
+@@ -105,7 +105,7 @@ static inline int compute_score(struct s
+ return -1;
+
+ score = 1;
+- if (sk->sk_incoming_cpu == raw_smp_processor_id())
++ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
+ score++;
+ }
+ return score;
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -135,7 +135,7 @@ static int compute_score(struct sock *sk
+ return -1;
+ score++;
+
+- if (sk->sk_incoming_cpu == raw_smp_processor_id())
++ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
+ score++;
+
+ return score;
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 29 Oct 2019 10:54:44 -0700
+Subject: net: annotate lockless accesses to sk->sk_napi_id
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ee8d153d46a3b98c064ee15c0c0a3bbf1450e5a1 ]
+
+We already annotated most accesses to sk->sk_napi_id
+
+We missed sk_mark_napi_id() and sk_mark_napi_id_once()
+which might be called without socket lock held in UDP stack.
+
+KCSAN reported :
+BUG: KCSAN: data-race in udpv6_queue_rcv_one_skb / udpv6_queue_rcv_one_skb
+
+write to 0xffff888121c6d108 of 4 bytes by interrupt on cpu 0:
+ sk_mark_napi_id include/net/busy_poll.h:125 [inline]
+ __udpv6_queue_rcv_skb net/ipv6/udp.c:571 [inline]
+ udpv6_queue_rcv_one_skb+0x70c/0xb40 net/ipv6/udp.c:672
+ udpv6_queue_rcv_skb+0xb5/0x400 net/ipv6/udp.c:689
+ udp6_unicast_rcv_skb.isra.0+0xd7/0x180 net/ipv6/udp.c:832
+ __udp6_lib_rcv+0x69c/0x1770 net/ipv6/udp.c:913
+ udpv6_rcv+0x2b/0x40 net/ipv6/udp.c:1015
+ ip6_protocol_deliver_rcu+0x22a/0xbe0 net/ipv6/ip6_input.c:409
+ ip6_input_finish+0x30/0x50 net/ipv6/ip6_input.c:450
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip6_input+0x177/0x190 net/ipv6/ip6_input.c:459
+ dst_input include/net/dst.h:442 [inline]
+ ip6_rcv_finish+0x110/0x140 net/ipv6/ip6_input.c:76
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ipv6_rcv+0x1a1/0x1b0 net/ipv6/ip6_input.c:284
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+ napi_poll net/core/dev.c:6392 [inline]
+ net_rx_action+0x3ae/0xa90 net/core/dev.c:6460
+
+write to 0xffff888121c6d108 of 4 bytes by interrupt on cpu 1:
+ sk_mark_napi_id include/net/busy_poll.h:125 [inline]
+ __udpv6_queue_rcv_skb net/ipv6/udp.c:571 [inline]
+ udpv6_queue_rcv_one_skb+0x70c/0xb40 net/ipv6/udp.c:672
+ udpv6_queue_rcv_skb+0xb5/0x400 net/ipv6/udp.c:689
+ udp6_unicast_rcv_skb.isra.0+0xd7/0x180 net/ipv6/udp.c:832
+ __udp6_lib_rcv+0x69c/0x1770 net/ipv6/udp.c:913
+ udpv6_rcv+0x2b/0x40 net/ipv6/udp.c:1015
+ ip6_protocol_deliver_rcu+0x22a/0xbe0 net/ipv6/ip6_input.c:409
+ ip6_input_finish+0x30/0x50 net/ipv6/ip6_input.c:450
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip6_input+0x177/0x190 net/ipv6/ip6_input.c:459
+ dst_input include/net/dst.h:442 [inline]
+ ip6_rcv_finish+0x110/0x140 net/ipv6/ip6_input.c:76
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ipv6_rcv+0x1a1/0x1b0 net/ipv6/ip6_input.c:284
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 10890 Comm: syz-executor.0 Not tainted 5.4.0-rc3+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Fixes: e68b6e50fa35 ("udp: enable busy polling for all sockets")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/busy_poll.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/include/net/busy_poll.h
++++ b/include/net/busy_poll.h
+@@ -122,7 +122,7 @@ static inline void skb_mark_napi_id(stru
+ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
+ {
+ #ifdef CONFIG_NET_RX_BUSY_POLL
+- sk->sk_napi_id = skb->napi_id;
++ WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
+ #endif
+ sk_rx_queue_set(sk, skb);
+ }
+@@ -132,8 +132,8 @@ static inline void sk_mark_napi_id_once(
+ const struct sk_buff *skb)
+ {
+ #ifdef CONFIG_NET_RX_BUSY_POLL
+- if (!sk->sk_napi_id)
+- sk->sk_napi_id = skb->napi_id;
++ if (!READ_ONCE(sk->sk_napi_id))
++ WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
+ #endif
+ }
+
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Doug Berger <opendmb@gmail.com>
+Date: Wed, 16 Oct 2019 16:06:29 -0700
+Subject: net: bcmgenet: don't set phydev->link from MAC
+
+From: Doug Berger <opendmb@gmail.com>
+
+[ Upstream commit 7de48402faa32298c3551ea32c76ccb4f9d3025d ]
+
+When commit 28b2e0d2cd13 ("net: phy: remove parameter new_link from
+phy_mac_interrupt()") removed the new_link parameter it set the
+phydev->link state from the MAC before invoking phy_mac_interrupt().
+
+However, once commit 88d6272acaaa ("net: phy: avoid unneeded MDIO
+reads in genphy_read_status") was added this initialization prevents
+the proper determination of the connection parameters by the function
+genphy_read_status().
+
+This commit removes that initialization to restore the proper
+functionality.
+
+Fixes: 88d6272acaaa ("net: phy: avoid unneeded MDIO reads in genphy_read_status")
+Signed-off-by: Doug Berger <opendmb@gmail.com>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -2617,10 +2617,8 @@ static void bcmgenet_irq_task(struct wor
+ spin_unlock_irq(&priv->lock);
+
+ /* Link UP/DOWN event */
+- if (status & UMAC_IRQ_LINK_EVENT) {
+- priv->dev->phydev->link = !!(status & UMAC_IRQ_LINK_UP);
++ if (status & UMAC_IRQ_LINK_EVENT)
+ phy_mac_interrupt(priv->dev->phydev);
+- }
+ }
+
+ /* bcmgenet_isr1: handle Rx and Tx priority queues */
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Doug Berger <opendmb@gmail.com>
+Date: Wed, 16 Oct 2019 16:06:32 -0700
+Subject: net: bcmgenet: reset 40nm EPHY on energy detect
+
+From: Doug Berger <opendmb@gmail.com>
+
+[ Upstream commit 25382b991d252aed961cd434176240f9de6bb15f ]
+
+The EPHY integrated into the 40nm Set-Top Box devices can falsely
+detect energy when connected to a disabled peer interface. When the
+peer interface is enabled the EPHY will detect and report the link
+as active, but on occasion may get into a state where it is not
+able to exchange data with the connected GENET MAC. This issue has
+not been observed when the link parameters are auto-negotiated;
+however, it has been observed with a manually configured link.
+
+It has been empirically determined that issuing a soft reset to the
+EPHY when energy is detected prevents it from getting into this bad
+state.
+
+Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
+Signed-off-by: Doug Berger <opendmb@gmail.com>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -2018,6 +2018,8 @@ static void bcmgenet_link_intr_enable(st
+ */
+ if (priv->internal_phy) {
+ int0_enable |= UMAC_IRQ_LINK_EVENT;
++ if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv))
++ int0_enable |= UMAC_IRQ_PHY_DET_R;
+ } else if (priv->ext_phy) {
+ int0_enable |= UMAC_IRQ_LINK_EVENT;
+ } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
+@@ -2616,9 +2618,14 @@ static void bcmgenet_irq_task(struct wor
+ priv->irq0_stat = 0;
+ spin_unlock_irq(&priv->lock);
+
++ if (status & UMAC_IRQ_PHY_DET_R &&
++ priv->dev->phydev->autoneg != AUTONEG_ENABLE)
++ phy_init_hw(priv->dev->phydev);
++
+ /* Link UP/DOWN event */
+ if (status & UMAC_IRQ_LINK_EVENT)
+ phy_mac_interrupt(priv->dev->phydev);
++
+ }
+
+ /* bcmgenet_isr1: handle Rx and Tx priority queues */
+@@ -2713,7 +2720,7 @@ static irqreturn_t bcmgenet_isr0(int irq
+ }
+
+ /* all other interested interrupts handled in bottom half */
+- status &= UMAC_IRQ_LINK_EVENT;
++ status &= (UMAC_IRQ_LINK_EVENT | UMAC_IRQ_PHY_DET_R);
+ if (status) {
+ /* Save irq status for bottom-half processing. */
+ spin_lock_irqsave(&priv->lock, flags);
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Doug Berger <opendmb@gmail.com>
+Date: Wed, 16 Oct 2019 16:06:31 -0700
+Subject: net: bcmgenet: soft reset 40nm EPHYs before MAC init
+
+From: Doug Berger <opendmb@gmail.com>
+
+[ Upstream commit 1f515486275a08a17a2c806b844cca18f7de5b34 ]
+
+It turns out that the "Workaround for putting the PHY in IDDQ mode"
+used by the internal EPHYs on 40nm Set-Top Box chips when powering
+down puts the interface to the GENET MAC in a state that can cause
+subsequent MAC resets to be incomplete.
+
+Rather than restore the forced soft reset when powering up internal
+PHYs, this commit moves the invocation of phy_init_hw earlier in
+the MAC initialization sequence to just before the MAC reset in the
+open and resume functions. This allows the interface to be stable
+and allows the MAC resets to be successful.
+
+The bcmgenet_mii_probe() function is split in two to accommodate
+this. The new function bcmgenet_mii_connect() handles the first
+half of the functionality before the MAC initialization, and the
+bcmgenet_mii_config() function is extended to provide the remaining
+PHY configuration following the MAC initialization.
+
+Fixes: 484bfa1507bf ("Revert "net: bcmgenet: Software reset EPHY after power on"")
+Signed-off-by: Doug Berger <opendmb@gmail.com>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.c | 28 +++---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2
+ drivers/net/ethernet/broadcom/genet/bcmmii.c | 112 +++++++++++--------------
+ 3 files changed, 69 insertions(+), 73 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -2877,6 +2877,12 @@ static int bcmgenet_open(struct net_devi
+ if (priv->internal_phy)
+ bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+
++ ret = bcmgenet_mii_connect(dev);
++ if (ret) {
++ netdev_err(dev, "failed to connect to PHY\n");
++ goto err_clk_disable;
++ }
++
+ /* take MAC out of reset */
+ bcmgenet_umac_reset(priv);
+
+@@ -2886,6 +2892,12 @@ static int bcmgenet_open(struct net_devi
+ reg = bcmgenet_umac_readl(priv, UMAC_CMD);
+ priv->crc_fwd_en = !!(reg & CMD_CRC_FWD);
+
++ ret = bcmgenet_mii_config(dev, true);
++ if (ret) {
++ netdev_err(dev, "unsupported PHY\n");
++ goto err_disconnect_phy;
++ }
++
+ bcmgenet_set_hw_addr(priv, dev->dev_addr);
+
+ if (priv->internal_phy) {
+@@ -2901,7 +2913,7 @@ static int bcmgenet_open(struct net_devi
+ ret = bcmgenet_init_dma(priv);
+ if (ret) {
+ netdev_err(dev, "failed to initialize DMA\n");
+- goto err_clk_disable;
++ goto err_disconnect_phy;
+ }
+
+ /* Always enable ring 16 - descriptor ring */
+@@ -2924,25 +2936,19 @@ static int bcmgenet_open(struct net_devi
+ goto err_irq0;
+ }
+
+- ret = bcmgenet_mii_probe(dev);
+- if (ret) {
+- netdev_err(dev, "failed to connect to PHY\n");
+- goto err_irq1;
+- }
+-
+ bcmgenet_netif_start(dev);
+
+ netif_tx_start_all_queues(dev);
+
+ return 0;
+
+-err_irq1:
+- free_irq(priv->irq1, priv);
+ err_irq0:
+ free_irq(priv->irq0, priv);
+ err_fini_dma:
+ bcmgenet_dma_teardown(priv);
+ bcmgenet_fini_dma(priv);
++err_disconnect_phy:
++ phy_disconnect(dev->phydev);
+ err_clk_disable:
+ if (priv->internal_phy)
+ bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
+@@ -3625,6 +3631,8 @@ static int bcmgenet_resume(struct device
+ if (priv->internal_phy)
+ bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+
++ phy_init_hw(dev->phydev);
++
+ bcmgenet_umac_reset(priv);
+
+ init_umac(priv);
+@@ -3633,8 +3641,6 @@ static int bcmgenet_resume(struct device
+ if (priv->wolopts)
+ clk_disable_unprepare(priv->clk_wol);
+
+- phy_init_hw(dev->phydev);
+-
+ /* Speed settings must be restored */
+ bcmgenet_mii_config(priv->dev, false);
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+@@ -720,8 +720,8 @@ GENET_IO_MACRO(rbuf, GENET_RBUF_OFF);
+
+ /* MDIO routines */
+ int bcmgenet_mii_init(struct net_device *dev);
++int bcmgenet_mii_connect(struct net_device *dev);
+ int bcmgenet_mii_config(struct net_device *dev, bool init);
+-int bcmgenet_mii_probe(struct net_device *dev);
+ void bcmgenet_mii_exit(struct net_device *dev);
+ void bcmgenet_phy_power_set(struct net_device *dev, bool enable);
+ void bcmgenet_mii_setup(struct net_device *dev);
+--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
+@@ -173,6 +173,46 @@ static void bcmgenet_moca_phy_setup(stru
+ bcmgenet_fixed_phy_link_update);
+ }
+
++int bcmgenet_mii_connect(struct net_device *dev)
++{
++ struct bcmgenet_priv *priv = netdev_priv(dev);
++ struct device_node *dn = priv->pdev->dev.of_node;
++ struct phy_device *phydev;
++ u32 phy_flags = 0;
++ int ret;
++
++ /* Communicate the integrated PHY revision */
++ if (priv->internal_phy)
++ phy_flags = priv->gphy_rev;
++
++ /* Initialize link state variables that bcmgenet_mii_setup() uses */
++ priv->old_link = -1;
++ priv->old_speed = -1;
++ priv->old_duplex = -1;
++ priv->old_pause = -1;
++
++ if (dn) {
++ phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup,
++ phy_flags, priv->phy_interface);
++ if (!phydev) {
++ pr_err("could not attach to PHY\n");
++ return -ENODEV;
++ }
++ } else {
++ phydev = dev->phydev;
++ phydev->dev_flags = phy_flags;
++
++ ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
++ priv->phy_interface);
++ if (ret) {
++ pr_err("could not attach to PHY\n");
++ return -ENODEV;
++ }
++ }
++
++ return 0;
++}
++
+ int bcmgenet_mii_config(struct net_device *dev, bool init)
+ {
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+@@ -266,71 +306,21 @@ int bcmgenet_mii_config(struct net_devic
+ bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
+ }
+
+- if (init)
+- dev_info(kdev, "configuring instance for %s\n", phy_name);
+-
+- return 0;
+-}
+-
+-int bcmgenet_mii_probe(struct net_device *dev)
+-{
+- struct bcmgenet_priv *priv = netdev_priv(dev);
+- struct device_node *dn = priv->pdev->dev.of_node;
+- struct phy_device *phydev;
+- u32 phy_flags = 0;
+- int ret;
+-
+- /* Communicate the integrated PHY revision */
+- if (priv->internal_phy)
+- phy_flags = priv->gphy_rev;
+-
+- /* Initialize link state variables that bcmgenet_mii_setup() uses */
+- priv->old_link = -1;
+- priv->old_speed = -1;
+- priv->old_duplex = -1;
+- priv->old_pause = -1;
+-
+- if (dn) {
+- phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup,
+- phy_flags, priv->phy_interface);
+- if (!phydev) {
+- pr_err("could not attach to PHY\n");
+- return -ENODEV;
+- }
+- } else {
+- phydev = dev->phydev;
+- phydev->dev_flags = phy_flags;
++ if (init) {
++ linkmode_copy(phydev->advertising, phydev->supported);
+
+- ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
+- priv->phy_interface);
+- if (ret) {
+- pr_err("could not attach to PHY\n");
+- return -ENODEV;
+- }
+- }
++ /* The internal PHY has its link interrupts routed to the
++ * Ethernet MAC ISRs. On GENETv5 there is a hardware issue
++ * that prevents the signaling of link UP interrupts when
++ * the link operates at 10Mbps, so fallback to polling for
++ * those versions of GENET.
++ */
++ if (priv->internal_phy && !GENET_IS_V5(priv))
++ phydev->irq = PHY_IGNORE_INTERRUPT;
+
+- /* Configure port multiplexer based on what the probed PHY device since
+- * reading the 'max-speed' property determines the maximum supported
+- * PHY speed which is needed for bcmgenet_mii_config() to configure
+- * things appropriately.
+- */
+- ret = bcmgenet_mii_config(dev, true);
+- if (ret) {
+- phy_disconnect(dev->phydev);
+- return ret;
++ dev_info(kdev, "configuring instance for %s\n", phy_name);
+ }
+
+- linkmode_copy(phydev->advertising, phydev->supported);
+-
+- /* The internal PHY has its link interrupts routed to the
+- * Ethernet MAC ISRs. On GENETv5 there is a hardware issue
+- * that prevents the signaling of link UP interrupts when
+- * the link operates at 10Mbps, so fallback to polling for
+- * those versions of GENET.
+- */
+- if (priv->internal_phy && !GENET_IS_V5(priv))
+- dev->phydev->irq = PHY_IGNORE_INTERRUPT;
+-
+ return 0;
+ }
+
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Sat, 5 Oct 2019 15:05:18 -0700
+Subject: net: dsa: b53: Do not clear existing mirrored port mask
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit c763ac436b668d7417f0979430ec0312ede4093d ]
+
+Clearing the existing bitmask of mirrored ports essentially prevents us
+from capturing more than one port at any given time. This is clearly
+wrong, do not clear the bitmask prior to setting up the new port.
+
+Reported-by: Hubert Feurstein <h.feurstein@gmail.com>
+Fixes: ed3af5fd08eb ("net: dsa: b53: Add support for port mirroring")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Vivien Didelot <vivien.didelot@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/b53/b53_common.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -1808,7 +1808,6 @@ int b53_mirror_add(struct dsa_switch *ds
+ loc = B53_EG_MIR_CTL;
+
+ b53_read16(dev, B53_MGMT_PAGE, loc, ®);
+- reg &= ~MIRROR_MASK;
+ reg |= BIT(port);
+ b53_write16(dev, B53_MGMT_PAGE, loc, reg);
+
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Thu, 31 Oct 2019 15:54:05 -0700
+Subject: net: dsa: bcm_sf2: Fix IMP setup for port different than 8
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 5fc0f21246e50afdf318b5a3a941f7f4f57b8947 ]
+
+Since it became possible for the DSA core to use a CPU port different
+than 8, our bcm_sf2_imp_setup() function was broken because it assumes
+that registers are applicable to port 8. In particular, the port's MAC
+is going to stay disabled, so make sure we clear the RX_DIS and TX_DIS
+bits if we are not configured for port 8.
+
+Fixes: 9f91484f6fcc ("net: dsa: make "label" property optional for dsa2")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c | 36 +++++++++++++++++++++---------------
+ 1 file changed, 21 insertions(+), 15 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -37,22 +37,11 @@ static void bcm_sf2_imp_setup(struct dsa
+ unsigned int i;
+ u32 reg, offset;
+
+- if (priv->type == BCM7445_DEVICE_ID)
+- offset = CORE_STS_OVERRIDE_IMP;
+- else
+- offset = CORE_STS_OVERRIDE_IMP2;
+-
+ /* Enable the port memories */
+ reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL);
+ reg &= ~P_TXQ_PSM_VDD(port);
+ core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL);
+
+- /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
+- reg = core_readl(priv, CORE_IMP_CTL);
+- reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN);
+- reg &= ~(RX_DIS | TX_DIS);
+- core_writel(priv, reg, CORE_IMP_CTL);
+-
+ /* Enable forwarding */
+ core_writel(priv, SW_FWDG_EN, CORE_SWMODE);
+
+@@ -71,10 +60,27 @@ static void bcm_sf2_imp_setup(struct dsa
+
+ b53_brcm_hdr_setup(ds, port);
+
+- /* Force link status for IMP port */
+- reg = core_readl(priv, offset);
+- reg |= (MII_SW_OR | LINK_STS);
+- core_writel(priv, reg, offset);
++ if (port == 8) {
++ if (priv->type == BCM7445_DEVICE_ID)
++ offset = CORE_STS_OVERRIDE_IMP;
++ else
++ offset = CORE_STS_OVERRIDE_IMP2;
++
++ /* Force link status for IMP port */
++ reg = core_readl(priv, offset);
++ reg |= (MII_SW_OR | LINK_STS);
++ core_writel(priv, reg, offset);
++
++ /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
++ reg = core_readl(priv, CORE_IMP_CTL);
++ reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN);
++ reg &= ~(RX_DIS | TX_DIS);
++ core_writel(priv, reg, CORE_IMP_CTL);
++ } else {
++ reg = core_readl(priv, CORE_G_PCTL_PORT(port));
++ reg &= ~(RX_DIS | TX_DIS);
++ core_writel(priv, reg, CORE_G_PCTL_PORT(port));
++ }
+ }
+
+ static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable)
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Vivien Didelot <vivien.didelot@gmail.com>
+Date: Fri, 18 Oct 2019 17:02:46 -0400
+Subject: net: dsa: fix switch tree list
+
+From: Vivien Didelot <vivien.didelot@gmail.com>
+
+[ Upstream commit 50c7d2ba9de20f60a2d527ad6928209ef67e4cdd ]
+
+If there are multiple switch trees on the device, only the last one
+will be listed, because the arguments of list_add_tail are swapped.
+
+Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation")
+Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dsa/dsa2.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/dsa/dsa2.c
++++ b/net/dsa/dsa2.c
+@@ -46,7 +46,7 @@ static struct dsa_switch_tree *dsa_tree_
+ dst->index = index;
+
+ INIT_LIST_HEAD(&dst->list);
+- list_add_tail(&dsa_tree_list, &dst->list);
++ list_add_tail(&dst->list, &dsa_tree_list);
+
+ kref_init(&dst->refcount);
+
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 16 Oct 2019 18:00:56 -0700
+Subject: net: ensure correct skb->tstamp in various fragmenters
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9669fffc1415bb0c30e5d2ec98a8e1c3a418cb9c ]
+
+Thomas found that some forwarded packets would be stuck
+in FQ packet scheduler because their skb->tstamp contained
+timestamps far in the future.
+
+We thought we addressed this point in commit 8203e2d844d3
+("net: clear skb->tstamp in forwarding paths") but there
+is still an issue when/if a packet needs to be fragmented.
+
+In order to meet EDT requirements, we have to make sure all
+fragments get the original skb->tstamp.
+
+Note that this original skb->tstamp should be zero in
+forwarding path, but might have a non zero value in
+output path if user decided so.
+
+Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Thomas Bartschies <Thomas.Bartschies@cvk.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/netfilter/nf_conntrack_bridge.c | 3 +++
+ net/ipv4/ip_output.c | 3 +++
+ net/ipv6/ip6_output.c | 3 +++
+ net/ipv6/netfilter.c | 3 +++
+ 4 files changed, 12 insertions(+)
+
+--- a/net/bridge/netfilter/nf_conntrack_bridge.c
++++ b/net/bridge/netfilter/nf_conntrack_bridge.c
+@@ -34,6 +34,7 @@ static int nf_br_ip_fragment(struct net
+ {
+ int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+ unsigned int hlen, ll_rs, mtu;
++ ktime_t tstamp = skb->tstamp;
+ struct ip_frag_state state;
+ struct iphdr *iph;
+ int err;
+@@ -81,6 +82,7 @@ static int nf_br_ip_fragment(struct net
+ if (iter.frag)
+ ip_fraglist_prepare(skb, &iter);
+
++ skb->tstamp = tstamp;
+ err = output(net, sk, data, skb);
+ if (err || !iter.frag)
+ break;
+@@ -105,6 +107,7 @@ slow_path:
+ goto blackhole;
+ }
+
++ skb2->tstamp = tstamp;
+ err = output(net, sk, data, skb2);
+ if (err)
+ goto blackhole;
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -771,6 +771,7 @@ int ip_do_fragment(struct net *net, stru
+ struct rtable *rt = skb_rtable(skb);
+ unsigned int mtu, hlen, ll_rs;
+ struct ip_fraglist_iter iter;
++ ktime_t tstamp = skb->tstamp;
+ struct ip_frag_state state;
+ int err = 0;
+
+@@ -846,6 +847,7 @@ int ip_do_fragment(struct net *net, stru
+ ip_fraglist_prepare(skb, &iter);
+ }
+
++ skb->tstamp = tstamp;
+ err = output(net, sk, skb);
+
+ if (!err)
+@@ -901,6 +903,7 @@ slow_path:
+ /*
+ * Put this fragment into the sending queue.
+ */
++ skb2->tstamp = tstamp;
+ err = output(net, sk, skb2);
+ if (err)
+ goto fail;
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -768,6 +768,7 @@ int ip6_fragment(struct net *net, struct
+ inet6_sk(skb->sk) : NULL;
+ struct ip6_frag_state state;
+ unsigned int mtu, hlen, nexthdr_offset;
++ ktime_t tstamp = skb->tstamp;
+ int hroom, err = 0;
+ __be32 frag_id;
+ u8 *prevhdr, nexthdr = 0;
+@@ -855,6 +856,7 @@ int ip6_fragment(struct net *net, struct
+ if (iter.frag)
+ ip6_fraglist_prepare(skb, &iter);
+
++ skb->tstamp = tstamp;
+ err = output(net, sk, skb);
+ if (!err)
+ IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
+@@ -913,6 +915,7 @@ slow_path:
+ /*
+ * Put this fragment into the sending queue.
+ */
++ frag->tstamp = tstamp;
+ err = output(net, sk, frag);
+ if (err)
+ goto fail;
+--- a/net/ipv6/netfilter.c
++++ b/net/ipv6/netfilter.c
+@@ -119,6 +119,7 @@ int br_ip6_fragment(struct net *net, str
+ struct sk_buff *))
+ {
+ int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
++ ktime_t tstamp = skb->tstamp;
+ struct ip6_frag_state state;
+ u8 *prevhdr, nexthdr = 0;
+ unsigned int mtu, hlen;
+@@ -183,6 +184,7 @@ int br_ip6_fragment(struct net *net, str
+ if (iter.frag)
+ ip6_fraglist_prepare(skb, &iter);
+
++ skb->tstamp = tstamp;
+ err = output(net, sk, data, skb);
+ if (err || !iter.frag)
+ break;
+@@ -215,6 +217,7 @@ slow_path:
+ goto blackhole;
+ }
+
++ skb2->tstamp = tstamp;
+ err = output(net, sk, data, skb2);
+ if (err)
+ goto blackhole;
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Date: Fri, 25 Oct 2019 13:47:24 +1100
+Subject: net: ethernet: ftgmac100: Fix DMA coherency issue with SW checksum
+
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+
+[ Upstream commit 88824e3bf29a2fcacfd9ebbfe03063649f0f3254 ]
+
+We are calling the checksum helper after the dma_map_single()
+call to map the packet. This is incorrect as the checksumming
+code will touch the packet from the CPU. This means the cache
+won't be properly flushes (or the bounce buffering will leave
+us with the unmodified packet to DMA).
+
+This moves the calculation of the checksum & vlan tags to
+before the DMA mapping.
+
+This also has the side effect of fixing another bug: If the
+checksum helper fails, we goto "drop" to drop the packet, which
+will not unmap the DMA mapping.
+
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Fixes: 05690d633f30 ("ftgmac100: Upgrade to NETIF_F_HW_CSUM")
+Reviewed-by: Vijay Khemka <vijaykhemka@fb.com>
+Tested-by: Vijay Khemka <vijaykhemka@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/faraday/ftgmac100.c | 25 ++++++++++++-------------
+ 1 file changed, 12 insertions(+), 13 deletions(-)
+
+--- a/drivers/net/ethernet/faraday/ftgmac100.c
++++ b/drivers/net/ethernet/faraday/ftgmac100.c
+@@ -726,6 +726,18 @@ static netdev_tx_t ftgmac100_hard_start_
+ */
+ nfrags = skb_shinfo(skb)->nr_frags;
+
++ /* Setup HW checksumming */
++ csum_vlan = 0;
++ if (skb->ip_summed == CHECKSUM_PARTIAL &&
++ !ftgmac100_prep_tx_csum(skb, &csum_vlan))
++ goto drop;
++
++ /* Add VLAN tag */
++ if (skb_vlan_tag_present(skb)) {
++ csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG;
++ csum_vlan |= skb_vlan_tag_get(skb) & 0xffff;
++ }
++
+ /* Get header len */
+ len = skb_headlen(skb);
+
+@@ -752,19 +764,6 @@ static netdev_tx_t ftgmac100_hard_start_
+ if (nfrags == 0)
+ f_ctl_stat |= FTGMAC100_TXDES0_LTS;
+ txdes->txdes3 = cpu_to_le32(map);
+-
+- /* Setup HW checksumming */
+- csum_vlan = 0;
+- if (skb->ip_summed == CHECKSUM_PARTIAL &&
+- !ftgmac100_prep_tx_csum(skb, &csum_vlan))
+- goto drop;
+-
+- /* Add VLAN tag */
+- if (skb_vlan_tag_present(skb)) {
+- csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG;
+- csum_vlan |= skb_vlan_tag_get(skb) & 0xffff;
+- }
+-
+ txdes->txdes1 = cpu_to_le32(csum_vlan);
+
+ /* Next descriptor */
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 24 Oct 2019 13:50:27 -0700
+Subject: net: fix sk_page_frag() recursion from memory reclaim
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit 20eb4f29b60286e0d6dc01d9c260b4bd383c58fb ]
+
+sk_page_frag() optimizes skb_frag allocations by using per-task
+skb_frag cache when it knows it's the only user. The condition is
+determined by seeing whether the socket allocation mask allows
+blocking - if the allocation may block, it obviously owns the task's
+context and ergo exclusively owns current->task_frag.
+
+Unfortunately, this misses recursion through memory reclaim path.
+Please take a look at the following backtrace.
+
+ [2] RIP: 0010:tcp_sendmsg_locked+0xccf/0xe10
+ ...
+ tcp_sendmsg+0x27/0x40
+ sock_sendmsg+0x30/0x40
+ sock_xmit.isra.24+0xa1/0x170 [nbd]
+ nbd_send_cmd+0x1d2/0x690 [nbd]
+ nbd_queue_rq+0x1b5/0x3b0 [nbd]
+ __blk_mq_try_issue_directly+0x108/0x1b0
+ blk_mq_request_issue_directly+0xbd/0xe0
+ blk_mq_try_issue_list_directly+0x41/0xb0
+ blk_mq_sched_insert_requests+0xa2/0xe0
+ blk_mq_flush_plug_list+0x205/0x2a0
+ blk_flush_plug_list+0xc3/0xf0
+ [1] blk_finish_plug+0x21/0x2e
+ _xfs_buf_ioapply+0x313/0x460
+ __xfs_buf_submit+0x67/0x220
+ xfs_buf_read_map+0x113/0x1a0
+ xfs_trans_read_buf_map+0xbf/0x330
+ xfs_btree_read_buf_block.constprop.42+0x95/0xd0
+ xfs_btree_lookup_get_block+0x95/0x170
+ xfs_btree_lookup+0xcc/0x470
+ xfs_bmap_del_extent_real+0x254/0x9a0
+ __xfs_bunmapi+0x45c/0xab0
+ xfs_bunmapi+0x15/0x30
+ xfs_itruncate_extents_flags+0xca/0x250
+ xfs_free_eofblocks+0x181/0x1e0
+ xfs_fs_destroy_inode+0xa8/0x1b0
+ destroy_inode+0x38/0x70
+ dispose_list+0x35/0x50
+ prune_icache_sb+0x52/0x70
+ super_cache_scan+0x120/0x1a0
+ do_shrink_slab+0x120/0x290
+ shrink_slab+0x216/0x2b0
+ shrink_node+0x1b6/0x4a0
+ do_try_to_free_pages+0xc6/0x370
+ try_to_free_mem_cgroup_pages+0xe3/0x1e0
+ try_charge+0x29e/0x790
+ mem_cgroup_charge_skmem+0x6a/0x100
+ __sk_mem_raise_allocated+0x18e/0x390
+ __sk_mem_schedule+0x2a/0x40
+ [0] tcp_sendmsg_locked+0x8eb/0xe10
+ tcp_sendmsg+0x27/0x40
+ sock_sendmsg+0x30/0x40
+ ___sys_sendmsg+0x26d/0x2b0
+ __sys_sendmsg+0x57/0xa0
+ do_syscall_64+0x42/0x100
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+In [0], tcp_send_msg_locked() was using current->page_frag when it
+called sk_wmem_schedule(). It already calculated how many bytes can
+be fit into current->page_frag. Due to memory pressure,
+sk_wmem_schedule() called into memory reclaim path which called into
+xfs and then IO issue path. Because the filesystem in question is
+backed by nbd, the control goes back into the tcp layer - back into
+tcp_sendmsg_locked().
+
+nbd sets sk_allocation to (GFP_NOIO | __GFP_MEMALLOC) which makes
+sense - it's in the process of freeing memory and wants to be able to,
+e.g., drop clean pages to make forward progress. However, this
+confused sk_page_frag() called from [2]. Because it only tests
+whether the allocation allows blocking which it does, it now thinks
+current->page_frag can be used again although it already was being
+used in [0].
+
+After [2] used current->page_frag, the offset would be increased by
+the used amount. When the control returns to [0],
+current->page_frag's offset is increased and the previously calculated
+number of bytes now may overrun the end of allocated memory leading to
+silent memory corruptions.
+
+Fix it by adding gfpflags_normal_context() which tests sleepable &&
+!reclaim and use it to determine whether to use current->task_frag.
+
+v2: Eric didn't like gfp flags being tested twice. Introduce a new
+ helper gfpflags_normal_context() and combine the two tests.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Cc: Josef Bacik <josef@toxicpanda.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/gfp.h | 23 +++++++++++++++++++++++
+ include/net/sock.h | 11 ++++++++---
+ 2 files changed, 31 insertions(+), 3 deletions(-)
+
+--- a/include/linux/gfp.h
++++ b/include/linux/gfp.h
+@@ -325,6 +325,29 @@ static inline bool gfpflags_allow_blocki
+ return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
+ }
+
++/**
++ * gfpflags_normal_context - is gfp_flags a normal sleepable context?
++ * @gfp_flags: gfp_flags to test
++ *
++ * Test whether @gfp_flags indicates that the allocation is from the
++ * %current context and allowed to sleep.
++ *
++ * An allocation being allowed to block doesn't mean it owns the %current
++ * context. When direct reclaim path tries to allocate memory, the
++ * allocation context is nested inside whatever %current was doing at the
++ * time of the original allocation. The nested allocation may be allowed
++ * to block but modifying anything %current owns can corrupt the outer
++ * context's expectations.
++ *
++ * %true result from this function indicates that the allocation context
++ * can sleep and use anything that's associated with %current.
++ */
++static inline bool gfpflags_normal_context(const gfp_t gfp_flags)
++{
++ return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) ==
++ __GFP_DIRECT_RECLAIM;
++}
++
+ #ifdef CONFIG_HIGHMEM
+ #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
+ #else
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -2233,12 +2233,17 @@ struct sk_buff *sk_stream_alloc_skb(stru
+ * sk_page_frag - return an appropriate page_frag
+ * @sk: socket
+ *
+- * If socket allocation mode allows current thread to sleep, it means its
+- * safe to use the per task page_frag instead of the per socket one.
++ * Use the per task page_frag instead of the per socket one for
++ * optimization when we know that we're in the normal context and owns
++ * everything that's associated with %current.
++ *
++ * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
++ * inside other socket operations and end up recursing into sk_page_frag()
++ * while it's already in use.
+ */
+ static inline struct page_frag *sk_page_frag(struct sock *sk)
+ {
+- if (gfpflags_allow_blocking(sk->sk_allocation))
++ if (gfpflags_normal_context(sk->sk_allocation))
+ return ¤t->task_frag;
+
+ return &sk->sk_frag;
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
+Date: Mon, 28 Oct 2019 13:09:46 +0800
+Subject: net: hisilicon: Fix ping latency when deal with high throughput
+
+From: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
+
+[ Upstream commit e56bd641ca61beb92b135298d5046905f920b734 ]
+
+This is due to error in over budget processing.
+When dealing with high throughput, the used buffers
+that exceeds the budget is not cleaned up. In addition,
+it takes a lot of cycles to clean up the used buffer,
+and then the buffer where the valid data is located can take effect.
+
+Signed-off-by: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hip04_eth.c | 15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
++++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
+@@ -237,6 +237,7 @@ struct hip04_priv {
+ dma_addr_t rx_phys[RX_DESC_NUM];
+ unsigned int rx_head;
+ unsigned int rx_buf_size;
++ unsigned int rx_cnt_remaining;
+
+ struct device_node *phy_node;
+ struct phy_device *phy;
+@@ -575,7 +576,6 @@ static int hip04_rx_poll(struct napi_str
+ struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi);
+ struct net_device *ndev = priv->ndev;
+ struct net_device_stats *stats = &ndev->stats;
+- unsigned int cnt = hip04_recv_cnt(priv);
+ struct rx_desc *desc;
+ struct sk_buff *skb;
+ unsigned char *buf;
+@@ -588,8 +588,8 @@ static int hip04_rx_poll(struct napi_str
+
+ /* clean up tx descriptors */
+ tx_remaining = hip04_tx_reclaim(ndev, false);
+-
+- while (cnt && !last) {
++ priv->rx_cnt_remaining += hip04_recv_cnt(priv);
++ while (priv->rx_cnt_remaining && !last) {
+ buf = priv->rx_buf[priv->rx_head];
+ skb = build_skb(buf, priv->rx_buf_size);
+ if (unlikely(!skb)) {
+@@ -635,11 +635,13 @@ refill:
+ hip04_set_recv_desc(priv, phys);
+
+ priv->rx_head = RX_NEXT(priv->rx_head);
+- if (rx >= budget)
++ if (rx >= budget) {
++ --priv->rx_cnt_remaining;
+ goto done;
++ }
+
+- if (--cnt == 0)
+- cnt = hip04_recv_cnt(priv);
++ if (--priv->rx_cnt_remaining == 0)
++ priv->rx_cnt_remaining += hip04_recv_cnt(priv);
+ }
+
+ if (!(priv->reg_inten & RCV_INT)) {
+@@ -724,6 +726,7 @@ static int hip04_mac_open(struct net_dev
+ int i;
+
+ priv->rx_head = 0;
++ priv->rx_cnt_remaining = 0;
+ priv->tx_head = 0;
+ priv->tx_tail = 0;
+ hip04_reset_ppe(priv);
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Yonglong Liu <liuyonglong@huawei.com>
+Date: Fri, 18 Oct 2019 11:42:59 +0800
+Subject: net: hns3: fix mis-counting IRQ vector numbers issue
+
+From: Yonglong Liu <liuyonglong@huawei.com>
+
+[ Upstream commit 580a05f9d4ada3bfb689140d0efec1efdb8a48da ]
+
+Currently, the num_msi_left means the vector numbers of NIC,
+but if the PF supported RoCE, it contains the vector numbers
+of NIC and RoCE(Not expected).
+
+This may cause interrupts lost in some case, because of the
+NIC module used the vector resources which belongs to RoCE.
+
+This patch adds a new variable num_nic_msi to store the vector
+numbers of NIC, and adjust the default TQP numbers and rss_size
+according to the value of num_nic_msi.
+
+Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
+Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
+Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 +
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 21 ++++++++++
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 1
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 11 ++++-
+ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 28 ++++++++++++--
+ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 1
+ 6 files changed, 58 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
++++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+@@ -32,6 +32,8 @@
+
+ #define HNAE3_MOD_VERSION "1.0"
+
++#define HNAE3_MIN_VECTOR_NUM 2 /* first one for misc, another for IO */
++
+ /* Device IDs */
+ #define HNAE3_DEV_ID_GE 0xA220
+ #define HNAE3_DEV_ID_25GE 0xA221
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -800,6 +800,9 @@ static int hclge_query_pf_resource(struc
+ hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
+ HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+
++ /* nic's msix numbers is always equals to the roce's. */
++ hdev->num_nic_msi = hdev->num_roce_msi;
++
+ /* PF should have NIC vectors and Roce vectors,
+ * NIC vectors are queued before Roce vectors.
+ */
+@@ -809,6 +812,15 @@ static int hclge_query_pf_resource(struc
+ hdev->num_msi =
+ hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
+ HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
++
++ hdev->num_nic_msi = hdev->num_msi;
++ }
++
++ if (hdev->num_nic_msi < HNAE3_MIN_VECTOR_NUM) {
++ dev_err(&hdev->pdev->dev,
++ "Just %u msi resources, not enough for pf(min:2).\n",
++ hdev->num_nic_msi);
++ return -EINVAL;
+ }
+
+ return 0;
+@@ -1394,6 +1406,10 @@ static int hclge_assign_tqp(struct hclg
+ kinfo->rss_size = min_t(u16, hdev->rss_size_max,
+ vport->alloc_tqps / hdev->tm_info.num_tc);
+
++ /* ensure one to one mapping between irq and queue at default */
++ kinfo->rss_size = min_t(u16, kinfo->rss_size,
++ (hdev->num_nic_msi - 1) / hdev->tm_info.num_tc);
++
+ return 0;
+ }
+
+@@ -2172,7 +2188,8 @@ static int hclge_init_msi(struct hclge_d
+ int vectors;
+ int i;
+
+- vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
++ vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM,
++ hdev->num_msi,
+ PCI_IRQ_MSI | PCI_IRQ_MSIX);
+ if (vectors < 0) {
+ dev_err(&pdev->dev,
+@@ -2187,6 +2204,7 @@ static int hclge_init_msi(struct hclge_d
+
+ hdev->num_msi = vectors;
+ hdev->num_msi_left = vectors;
++
+ hdev->base_msi_vector = pdev->irq;
+ hdev->roce_base_vector = hdev->base_msi_vector +
+ hdev->roce_base_msix_offset;
+@@ -3644,6 +3662,7 @@ static int hclge_get_vector(struct hnae3
+ int alloc = 0;
+ int i, j;
+
++ vector_num = min_t(u16, hdev->num_nic_msi - 1, vector_num);
+ vector_num = min(hdev->num_msi_left, vector_num);
+
+ for (j = 0; j < vector_num; j++) {
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+@@ -795,6 +795,7 @@ struct hclge_dev {
+ u32 base_msi_vector;
+ u16 *vector_status;
+ int *vector_irq;
++ u16 num_nic_msi; /* Num of nic vectors for this PF */
+ u16 num_roce_msi; /* Num of roce vectors for this PF */
+ int roce_base_vector;
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+@@ -540,9 +540,16 @@ static void hclge_tm_vport_tc_info_updat
+ kinfo->rss_size = kinfo->req_rss_size;
+ } else if (kinfo->rss_size > max_rss_size ||
+ (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) {
++ /* if user not set rss, the rss_size should compare with the
++ * valid msi numbers to ensure one to one map between tqp and
++ * irq as default.
++ */
++ if (!kinfo->req_rss_size)
++ max_rss_size = min_t(u16, max_rss_size,
++ (hdev->num_nic_msi - 1) /
++ kinfo->num_tc);
++
+ /* Set to the maximum specification value (max_rss_size). */
+- dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n",
+- kinfo->rss_size, max_rss_size);
+ kinfo->rss_size = max_rss_size;
+ }
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+@@ -411,6 +411,13 @@ static int hclgevf_knic_setup(struct hcl
+ kinfo->tqp[i] = &hdev->htqp[i].q;
+ }
+
++ /* after init the max rss_size and tqps, adjust the default tqp numbers
++ * and rss size with the actual vector numbers
++ */
++ kinfo->num_tqps = min_t(u16, hdev->num_nic_msix - 1, kinfo->num_tqps);
++ kinfo->rss_size = min_t(u16, kinfo->num_tqps / kinfo->num_tc,
++ kinfo->rss_size);
++
+ return 0;
+ }
+
+@@ -502,6 +509,7 @@ static int hclgevf_get_vector(struct hna
+ int alloc = 0;
+ int i, j;
+
++ vector_num = min_t(u16, hdev->num_nic_msix - 1, vector_num);
+ vector_num = min(hdev->num_msi_left, vector_num);
+
+ for (j = 0; j < vector_num; j++) {
+@@ -2208,13 +2216,14 @@ static int hclgevf_init_msi(struct hclge
+ int vectors;
+ int i;
+
+- if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B))
++ if (hnae3_dev_roce_supported(hdev))
+ vectors = pci_alloc_irq_vectors(pdev,
+ hdev->roce_base_msix_offset + 1,
+ hdev->num_msi,
+ PCI_IRQ_MSIX);
+ else
+- vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
++ vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM,
++ hdev->num_msi,
+ PCI_IRQ_MSI | PCI_IRQ_MSIX);
+
+ if (vectors < 0) {
+@@ -2230,6 +2239,7 @@ static int hclgevf_init_msi(struct hclge
+
+ hdev->num_msi = vectors;
+ hdev->num_msi_left = vectors;
++
+ hdev->base_msi_vector = pdev->irq;
+ hdev->roce_base_vector = pdev->irq + hdev->roce_base_msix_offset;
+
+@@ -2495,7 +2505,7 @@ static int hclgevf_query_vf_resource(str
+
+ req = (struct hclgevf_query_res_cmd *)desc.data;
+
+- if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)) {
++ if (hnae3_dev_roce_supported(hdev)) {
+ hdev->roce_base_msix_offset =
+ hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee),
+ HCLGEVF_MSIX_OFT_ROCEE_M,
+@@ -2504,6 +2514,9 @@ static int hclgevf_query_vf_resource(str
+ hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
+ HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
+
++ /* nic's msix numbers is always equals to the roce's. */
++ hdev->num_nic_msix = hdev->num_roce_msix;
++
+ /* VF should have NIC vectors and Roce vectors, NIC vectors
+ * are queued before Roce vectors. The offset is fixed to 64.
+ */
+@@ -2513,6 +2526,15 @@ static int hclgevf_query_vf_resource(str
+ hdev->num_msi =
+ hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
+ HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
++
++ hdev->num_nic_msix = hdev->num_msi;
++ }
++
++ if (hdev->num_nic_msix < HNAE3_MIN_VECTOR_NUM) {
++ dev_err(&hdev->pdev->dev,
++ "Just %u msi resources, not enough for vf(min:2).\n",
++ hdev->num_nic_msix);
++ return -EINVAL;
+ }
+
+ return 0;
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+@@ -265,6 +265,7 @@ struct hclgevf_dev {
+ u16 num_msi;
+ u16 num_msi_left;
+ u16 num_msi_used;
++ u16 num_nic_msix; /* Num of nic vectors for this VF */
+ u16 num_roce_msix; /* Num of roce vectors for this VF */
+ u16 roce_base_msix_offset;
+ int roce_base_vector;
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eran Ben Elisha <eranbe@mellanox.com>
+Date: Sun, 27 Oct 2019 16:39:15 +0200
+Subject: net/mlx4_core: Dynamically set guaranteed amount of counters per VF
+
+From: Eran Ben Elisha <eranbe@mellanox.com>
+
+[ Upstream commit e19868efea0c103f23b4b7e986fd0a703822111f ]
+
+Prior to this patch, the amount of counters guaranteed per VF in the
+resource tracker was MLX4_VF_COUNTERS_PER_PORT * MLX4_MAX_PORTS. It was
+set regardless if the VF was single or dual port.
+This caused several VFs to have no guaranteed counters although the
+system could satisfy their request.
+
+The fix is to dynamically guarantee counters, based on each VF
+specification.
+
+Fixes: 9de92c60beaa ("net/mlx4_core: Adjust counter grant policy in the resource tracker")
+Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 42 +++++++++++-------
+ 1 file changed, 26 insertions(+), 16 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
++++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+@@ -471,12 +471,31 @@ void mlx4_init_quotas(struct mlx4_dev *d
+ priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf];
+ }
+
+-static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev)
++static int
++mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev,
++ struct resource_allocator *res_alloc,
++ int vf)
+ {
+- /* reduce the sink counter */
+- return (dev->caps.max_counters - 1 -
+- (MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS))
+- / MLX4_MAX_PORTS;
++ struct mlx4_active_ports actv_ports;
++ int ports, counters_guaranteed;
++
++ /* For master, only allocate according to the number of phys ports */
++ if (vf == mlx4_master_func_num(dev))
++ return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports;
++
++ /* calculate real number of ports for the VF */
++ actv_ports = mlx4_get_active_ports(dev, vf);
++ ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
++ counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT;
++
++ /* If we do not have enough counters for this VF, do not
++ * allocate any for it. '-1' to reduce the sink counter.
++ */
++ if ((res_alloc->res_reserved + counters_guaranteed) >
++ (dev->caps.max_counters - 1))
++ return 0;
++
++ return counters_guaranteed;
+ }
+
+ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
+@@ -484,7 +503,6 @@ int mlx4_init_resource_tracker(struct ml
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, j;
+ int t;
+- int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev);
+
+ priv->mfunc.master.res_tracker.slave_list =
+ kcalloc(dev->num_slaves, sizeof(struct slave_list),
+@@ -603,16 +621,8 @@ int mlx4_init_resource_tracker(struct ml
+ break;
+ case RES_COUNTER:
+ res_alloc->quota[t] = dev->caps.max_counters;
+- if (t == mlx4_master_func_num(dev))
+- res_alloc->guaranteed[t] =
+- MLX4_PF_COUNTERS_PER_PORT *
+- MLX4_MAX_PORTS;
+- else if (t <= max_vfs_guarantee_counter)
+- res_alloc->guaranteed[t] =
+- MLX4_VF_COUNTERS_PER_PORT *
+- MLX4_MAX_PORTS;
+- else
+- res_alloc->guaranteed[t] = 0;
++ res_alloc->guaranteed[t] =
++ mlx4_calc_res_counter_guaranteed(dev, res_alloc, t);
+ break;
+ default:
+ break;
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Roi Dayan <roid@mellanox.com>
+Date: Wed, 11 Sep 2019 14:44:50 +0300
+Subject: net/mlx5: Fix flow counter list auto bits struct
+
+From: Roi Dayan <roid@mellanox.com>
+
+[ Upstream commit 6dfef396ea13873ae9066ee2e0ad6ee364031fe2 ]
+
+The union should contain the extended dest and counter list.
+Remove the resevered 0x40 bits which is redundant.
+This change doesn't break any functionally.
+Everything works today because the code in fs_cmd.c is using
+the correct structs if extended dest or the basic dest.
+
+Fixes: 1b115498598f ("net/mlx5: Introduce extended destination fields")
+Signed-off-by: Roi Dayan <roid@mellanox.com>
+Reviewed-by: Mark Bloch <markb@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mlx5/mlx5_ifc.h | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -1437,9 +1437,8 @@ struct mlx5_ifc_extended_dest_format_bit
+ };
+
+ union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits {
+- struct mlx5_ifc_dest_format_struct_bits dest_format_struct;
++ struct mlx5_ifc_extended_dest_format_bits extended_dest_format;
+ struct mlx5_ifc_flow_counter_list_bits flow_counter_list;
+- u8 reserved_at_0[0x40];
+ };
+
+ struct mlx5_ifc_fte_match_param_bits {
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Parav Pandit <parav@mellanox.com>
+Date: Thu, 19 Sep 2019 15:58:14 -0500
+Subject: net/mlx5: Fix rtable reference leak
+
+From: Parav Pandit <parav@mellanox.com>
+
+[ Upstream commit 2347cee83b2bd868bde2d283db0fac89f22be4e0 ]
+
+If the rt entry gateway family is not AF_INET for multipath device,
+rtable reference is leaked.
+Hence, fix it by releasing the reference.
+
+Fixes: 5fb091e8130b ("net/mlx5e: Use hint to resolve route when in HW multipath mode")
+Fixes: e32ee6c78efa ("net/mlx5e: Support tunnel encap over tagged Ethernet")
+Signed-off-by: Parav Pandit <parav@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+@@ -90,15 +90,19 @@ static int mlx5e_route_lookup_ipv4(struc
+ if (ret)
+ return ret;
+
+- if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET)
++ if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
++ ip_rt_put(rt);
+ return -ENETUNREACH;
++ }
+ #else
+ return -EOPNOTSUPP;
+ #endif
+
+ ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev);
+- if (ret < 0)
++ if (ret < 0) {
++ ip_rt_put(rt);
+ return ret;
++ }
+
+ if (!(*out_ttl))
+ *out_ttl = ip4_dst_hoplimit(&rt->dst);
+@@ -142,8 +146,10 @@ static int mlx5e_route_lookup_ipv6(struc
+ *out_ttl = ip6_dst_hoplimit(dst);
+
+ ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
+- if (ret < 0)
++ if (ret < 0) {
++ dst_release(dst);
+ return ret;
++ }
+ #else
+ return -EOPNOTSUPP;
+ #endif
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Dmytro Linkin <dmitrolin@mellanox.com>
+Date: Wed, 4 Sep 2019 12:32:49 +0000
+Subject: net/mlx5e: Determine source port properly for vlan push action
+
+From: Dmytro Linkin <dmitrolin@mellanox.com>
+
+[ Upstream commit d5dbcc4e87bc8444bd2f1ca4b8f787e1e5677ec2 ]
+
+Termination tables are used for vlan push actions on uplink ports.
+To support RoCE dual port the source port value was placed in a register.
+Fix the code to use an API method returning the source port according to
+the FW capabilities.
+
+Fixes: 10caabdaad5a ("net/mlx5e: Use termination table for VLAN push actions")
+Signed-off-by: Dmytro Linkin <dmitrolin@mellanox.com>
+Reviewed-by: Jianbo Liu <jianbol@mellanox.com>
+Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
+Reviewed-by: Roi Dayan <roid@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c | 22 +++++++---
+ 1 file changed, 16 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+@@ -177,22 +177,32 @@ mlx5_eswitch_termtbl_actions_move(struct
+ memset(&src->vlan[1], 0, sizeof(src->vlan[1]));
+ }
+
++static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw,
++ const struct mlx5_flow_spec *spec)
++{
++ u32 port_mask, port_value;
++
++ if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
++ return spec->flow_context.flow_source == MLX5_VPORT_UPLINK;
++
++ port_mask = MLX5_GET(fte_match_param, spec->match_criteria,
++ misc_parameters.source_port);
++ port_value = MLX5_GET(fte_match_param, spec->match_value,
++ misc_parameters.source_port);
++ return (port_mask & port_value & 0xffff) == MLX5_VPORT_UPLINK;
++}
++
+ bool
+ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_spec *spec)
+ {
+- u32 port_mask = MLX5_GET(fte_match_param, spec->match_criteria,
+- misc_parameters.source_port);
+- u32 port_value = MLX5_GET(fte_match_param, spec->match_value,
+- misc_parameters.source_port);
+-
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table))
+ return false;
+
+ /* push vlan on RX */
+ return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) &&
+- ((port_mask & port_value) == MLX5_VPORT_UPLINK);
++ mlx5_eswitch_offload_is_uplink_port(esw, spec);
+ }
+
+ struct mlx5_flow_handle *
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Aya Levin <ayal@mellanox.com>
+Date: Wed, 2 Oct 2019 16:53:21 +0300
+Subject: net/mlx5e: Fix ethtool self test: link speed
+
+From: Aya Levin <ayal@mellanox.com>
+
+[ Upstream commit 534e7366f41b0c689b01af4375aefcd1462adedf ]
+
+Ethtool self test contains a test for link speed. This test reads the
+PTYS register and determines whether the current speed is valid or not.
+Change current implementation to use the function mlx5e_port_linkspeed()
+that does the same check and fails when speed is invalid. This code
+redundancy lead to a bug when mlx5e_port_linkspeed() was updated with
+expended speeds and the self test was not.
+
+Fixes: 2c81bfd5ae56 ("net/mlx5e: Move port speed code from en_ethtool.c to en/port.c")
+Signed-off-by: Aya Levin <ayal@mellanox.com>
+Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 15 +++------------
+ 1 file changed, 3 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+@@ -35,6 +35,7 @@
+ #include <linux/udp.h>
+ #include <net/udp.h>
+ #include "en.h"
++#include "en/port.h"
+
+ enum {
+ MLX5E_ST_LINK_STATE,
+@@ -80,22 +81,12 @@ static int mlx5e_test_link_state(struct
+
+ static int mlx5e_test_link_speed(struct mlx5e_priv *priv)
+ {
+- u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+- u32 eth_proto_oper;
+- int i;
++ u32 speed;
+
+ if (!netif_carrier_ok(priv->netdev))
+ return 1;
+
+- if (mlx5_query_port_ptys(priv->mdev, out, sizeof(out), MLX5_PTYS_EN, 1))
+- return 1;
+-
+- eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+- for (i = 0; i < MLX5E_LINK_MODES_NUMBER; i++) {
+- if (eth_proto_oper & MLX5E_PROT_MASK(i))
+- return 0;
+- }
+- return 1;
++ return mlx5e_port_linkspeed(priv->mdev, &speed);
+ }
+
+ struct mlx5ehdr {
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+Date: Mon, 16 Sep 2019 14:54:20 +0300
+Subject: net/mlx5e: Fix handling of compressed CQEs in case of low NAPI budget
+
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+
+[ Upstream commit 9df86bdb6746d7fcfc2fda715f7a7c3d0ddb2654 ]
+
+When CQE compression is enabled, compressed CQEs use the following
+structure: a title is followed by one or many blocks, each containing 8
+mini CQEs (except the last, which may contain fewer mini CQEs).
+
+Due to NAPI budget restriction, a complete structure is not always
+parsed in one NAPI run, and some blocks with mini CQEs may be deferred
+to the next NAPI poll call - we have the mlx5e_decompress_cqes_cont call
+in the beginning of mlx5e_poll_rx_cq. However, if the budget is
+extremely low, some blocks may be left even after that, but the code
+that follows the mlx5e_decompress_cqes_cont call doesn't check it and
+assumes that a new CQE begins, which may not be the case. In such cases,
+random memory corruptions occur.
+
+An extremely low NAPI budget of 8 is used when busy_poll or busy_read is
+active.
+
+This commit adds a check to make sure that the previous compressed CQE
+has been completely parsed after mlx5e_decompress_cqes_cont, otherwise
+it prevents a new CQE from being fetched in the middle of a compressed
+CQE.
+
+This commit fixes random crashes in __build_skb, __page_pool_put_page
+and other not-related-directly places, that used to happen when both CQE
+compression and busy_poll/busy_read were enabled.
+
+Fixes: 7219ab34f184 ("net/mlx5e: CQE compression")
+Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -1367,8 +1367,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
+ return 0;
+
+- if (rq->cqd.left)
++ if (rq->cqd.left) {
+ work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
++ if (rq->cqd.left || work_done >= budget)
++ goto out;
++ }
+
+ cqe = mlx5_cqwq_get_cqe(cqwq);
+ if (!cqe) {
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Aya Levin <ayal@mellanox.com>
+Date: Wed, 23 Oct 2019 12:57:54 +0300
+Subject: net/mlx5e: Initialize on stack link modes bitmap
+
+From: Aya Levin <ayal@mellanox.com>
+
+[ Upstream commit 926b37f76fb0a22fe93c8873c819fd167180e85c ]
+
+Initialize link modes bitmap on stack before using it, otherwise the
+outcome of ethtool set link ksettings might have unexpected values.
+
+Fixes: 4b95840a6ced ("net/mlx5e: Fix matching of speed to PRM link modes")
+Signed-off-by: Aya Levin <ayal@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+@@ -1021,7 +1021,7 @@ static bool ext_link_mode_requested(cons
+ {
+ #define MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT ETHTOOL_LINK_MODE_50000baseKR_Full_BIT
+ int size = __ETHTOOL_LINK_MODE_MASK_NBITS - MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT;
+- __ETHTOOL_DECLARE_LINK_MODE_MASK(modes);
++ __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = {0,};
+
+ bitmap_set(modes, MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT, size);
+ return bitmap_intersects(modes, adver, __ETHTOOL_LINK_MODE_MASK_NBITS);
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Dmytro Linkin <dmitrolin@mellanox.com>
+Date: Thu, 29 Aug 2019 15:24:27 +0000
+Subject: net/mlx5e: Remove incorrect match criteria assignment line
+
+From: Dmytro Linkin <dmitrolin@mellanox.com>
+
+[ Upstream commit 752d3dc06d6936d5a357a18b6b51d91c7e134e88 ]
+
+Driver have function, which enable match criteria for misc parameters
+in dependence of eswitch capabilities.
+
+Fixes: 4f5d1beadc10 ("Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux")
+Signed-off-by: Dmytro Linkin <dmitrolin@mellanox.com>
+Reviewed-by: Jianbo Liu <jianbol@mellanox.com>
+Reviewed-by: Roi Dayan <roid@mellanox.com>
+Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -285,7 +285,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_es
+
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr);
+
+- spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ if (attr->outer_match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+Date: Fri, 18 Oct 2019 09:16:58 -0700
+Subject: net: netem: correct the parent's backlog when corrupted packet was dropped
+
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+
+[ Upstream commit e0ad032e144731a5928f2d75e91c2064ba1a764c ]
+
+If packet corruption failed we jump to finish_segs and return
+NET_XMIT_SUCCESS. Seeing success will make the parent qdisc
+increment its backlog, that's incorrect - we need to return
+NET_XMIT_DROP.
+
+Fixes: 6071bd1aa13e ("netem: Segment GSO packets on enqueue")
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Reviewed-by: Simon Horman <simon.horman@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_netem.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/sched/sch_netem.c
++++ b/net/sched/sch_netem.c
+@@ -616,6 +616,8 @@ finish_segs:
+ }
+ /* Parent qdiscs accounted for 1 skb of size @prev_len */
+ qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
++ } else if (!skb) {
++ return NET_XMIT_DROP;
+ }
+ return NET_XMIT_SUCCESS;
+ }
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+Date: Fri, 18 Oct 2019 09:16:57 -0700
+Subject: net: netem: fix error path for corrupted GSO frames
+
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+
+[ Upstream commit a7fa12d15855904aff1716e1fc723c03ba38c5cc ]
+
+To corrupt a GSO frame we first perform segmentation. We then
+proceed using the first segment instead of the full GSO skb and
+requeue the rest of the segments as separate packets.
+
+If there are any issues with processing the first segment we
+still want to process the rest, therefore we jump to the
+finish_segs label.
+
+Commit 177b8007463c ("net: netem: fix backlog accounting for
+corrupted GSO frames") started using the pointer to the first
+segment in the "rest of segments processing", but as mentioned
+above the first segment may had already been freed at this point.
+
+Backlog corrections for parent qdiscs have to be adjusted.
+
+Fixes: 177b8007463c ("net: netem: fix backlog accounting for corrupted GSO frames")
+Reported-by: kbuild test robot <lkp@intel.com>
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reported-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Reviewed-by: Simon Horman <simon.horman@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_netem.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/net/sched/sch_netem.c
++++ b/net/sched/sch_netem.c
+@@ -509,6 +509,7 @@ static int netem_enqueue(struct sk_buff
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_help(skb)) {
+ qdisc_drop(skb, sch, to_free);
++ skb = NULL;
+ goto finish_segs;
+ }
+
+@@ -593,9 +594,10 @@ static int netem_enqueue(struct sk_buff
+ finish_segs:
+ if (segs) {
+ unsigned int len, last_len;
+- int nb = 0;
++ int nb;
+
+- len = skb->len;
++ len = skb ? skb->len : 0;
++ nb = skb ? 1 : 0;
+
+ while (segs) {
+ skb2 = segs->next;
+@@ -612,7 +614,8 @@ finish_segs:
+ }
+ segs = skb2;
+ }
+- qdisc_tree_reduce_backlog(sch, -nb, prev_len - len);
++ /* Parent qdiscs accounted for 1 skb of size @prev_len */
++ qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
+ }
+ return NET_XMIT_SUCCESS;
+ }
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Doug Berger <opendmb@gmail.com>
+Date: Wed, 16 Oct 2019 16:06:30 -0700
+Subject: net: phy: bcm7xxx: define soft_reset for 40nm EPHY
+
+From: Doug Berger <opendmb@gmail.com>
+
+[ Upstream commit fe586b823372a9f43f90e2c6aa0573992ce7ccb7 ]
+
+The internal 40nm EPHYs use a "Workaround for putting the PHY in
+IDDQ mode." These PHYs require a soft reset to restore functionality
+after they are powered back up.
+
+This commit defines the soft_reset function to use genphy_soft_reset
+during phy_init_hw to accommodate this.
+
+Fixes: 6e2d85ec0559 ("net: phy: Stop with excessive soft reset")
+Signed-off-by: Doug Berger <opendmb@gmail.com>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/bcm7xxx.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/phy/bcm7xxx.c
++++ b/drivers/net/phy/bcm7xxx.c
+@@ -572,6 +572,7 @@ static int bcm7xxx_28nm_probe(struct phy
+ .name = _name, \
+ /* PHY_BASIC_FEATURES */ \
+ .flags = PHY_IS_INTERNAL, \
++ .soft_reset = genphy_soft_reset, \
+ .config_init = bcm7xxx_config_init, \
+ .suspend = bcm7xxx_suspend, \
+ .resume = bcm7xxx_config_init, \
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Thu, 31 Oct 2019 15:42:26 -0700
+Subject: net: phylink: Fix phylink_dbg() macro
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 9d68db5092c5fac99fccfdeab3f04df0b27d1762 ]
+
+The phylink_dbg() macro does not follow dynamic debug or defined(DEBUG)
+and as a result, it spams the kernel log since a PR_DEBUG level is
+currently used. Fix it to be defined appropriately whether
+CONFIG_DYNAMIC_DEBUG or defined(DEBUG) are set.
+
+Fixes: 17091180b152 ("net: phylink: Add phylink_{printk, err, warn, info, dbg} macros")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phylink.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+--- a/drivers/net/phy/phylink.c
++++ b/drivers/net/phy/phylink.c
+@@ -87,8 +87,24 @@ struct phylink {
+ phylink_printk(KERN_WARNING, pl, fmt, ##__VA_ARGS__)
+ #define phylink_info(pl, fmt, ...) \
+ phylink_printk(KERN_INFO, pl, fmt, ##__VA_ARGS__)
++#if defined(CONFIG_DYNAMIC_DEBUG)
+ #define phylink_dbg(pl, fmt, ...) \
++do { \
++ if ((pl)->config->type == PHYLINK_NETDEV) \
++ netdev_dbg((pl)->netdev, fmt, ##__VA_ARGS__); \
++ else if ((pl)->config->type == PHYLINK_DEV) \
++ dev_dbg((pl)->dev, fmt, ##__VA_ARGS__); \
++} while (0)
++#elif defined(DEBUG)
++#define phylink_dbg(pl, fmt, ...) \
+ phylink_printk(KERN_DEBUG, pl, fmt, ##__VA_ARGS__)
++#else
++#define phylink_dbg(pl, fmt, ...) \
++({ \
++ if (0) \
++ phylink_printk(KERN_DEBUG, pl, fmt, ##__VA_ARGS__); \
++})
++#endif
+
+ /**
+ * phylink_set_port_modes() - set the port type modes in the ethtool mask
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 18 Oct 2019 15:20:05 -0700
+Subject: net: reorder 'struct net' fields to avoid false sharing
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 2a06b8982f8f2f40d03a3daf634676386bd84dbc ]
+
+Intel test robot reported a ~7% regression on TCP_CRR tests
+that they bisected to the cited commit.
+
+Indeed, every time a new TCP socket is created or deleted,
+the atomic counter net->count is touched (via get_net(net)
+and put_net(net) calls)
+
+So cpus might have to reload a contended cache line in
+net_hash_mix(net) calls.
+
+We need to reorder 'struct net' fields to move @hash_mix
+in a read mostly cache line.
+
+We move in the first cache line fields that can be
+dirtied often.
+
+We probably will have to address in a followup patch
+the __randomize_layout that was added in linux-4.13,
+since this might break our placement choices.
+
+Fixes: 355b98553789 ("netns: provide pure entropy for net_hash_mix()")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/net_namespace.h | 25 +++++++++++++++++--------
+ 1 file changed, 17 insertions(+), 8 deletions(-)
+
+--- a/include/net/net_namespace.h
++++ b/include/net/net_namespace.h
+@@ -52,6 +52,9 @@ struct bpf_prog;
+ #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
+
+ struct net {
++ /* First cache line can be often dirtied.
++ * Do not place here read-mostly fields.
++ */
+ refcount_t passive; /* To decide when the network
+ * namespace should be freed.
+ */
+@@ -60,7 +63,13 @@ struct net {
+ */
+ spinlock_t rules_mod_lock;
+
+- u32 hash_mix;
++ unsigned int dev_unreg_count;
++
++ unsigned int dev_base_seq; /* protected by rtnl_mutex */
++ int ifindex;
++
++ spinlock_t nsid_lock;
++ atomic_t fnhe_genid;
+
+ struct list_head list; /* list of network namespaces */
+ struct list_head exit_list; /* To linked to call pernet exit
+@@ -76,11 +85,11 @@ struct net {
+ #endif
+ struct user_namespace *user_ns; /* Owning user namespace */
+ struct ucounts *ucounts;
+- spinlock_t nsid_lock;
+ struct idr netns_ids;
+
+ struct ns_common ns;
+
++ struct list_head dev_base_head;
+ struct proc_dir_entry *proc_net;
+ struct proc_dir_entry *proc_net_stat;
+
+@@ -93,12 +102,14 @@ struct net {
+
+ struct uevent_sock *uevent_sock; /* uevent socket */
+
+- struct list_head dev_base_head;
+ struct hlist_head *dev_name_head;
+ struct hlist_head *dev_index_head;
+- unsigned int dev_base_seq; /* protected by rtnl_mutex */
+- int ifindex;
+- unsigned int dev_unreg_count;
++ /* Note that @hash_mix can be read millions times per second,
++ * it is critical that it is on a read_mostly cache line.
++ */
++ u32 hash_mix;
++
++ struct net_device *loopback_dev; /* The loopback */
+
+ /* core fib_rules */
+ struct list_head rules_ops;
+@@ -106,7 +117,6 @@ struct net {
+ struct list_head fib_notifier_ops; /* Populated by
+ * register_pernet_subsys()
+ */
+- struct net_device *loopback_dev; /* The loopback */
+ struct netns_core core;
+ struct netns_mib mib;
+ struct netns_packet packet;
+@@ -171,7 +181,6 @@ struct net {
+ struct netns_xdp xdp;
+ #endif
+ struct sock *diag_nlsk;
+- atomic_t fnhe_genid;
+ } __randomize_layout;
+
+ #include <linux/seq_file_net.h>
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Tue, 29 Oct 2019 13:59:32 +0200
+Subject: net: rtnetlink: fix a typo fbd -> fdb
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+[ Upstream commit 8b73018fe44521c1cf59d7bac53624c87d3f10e2 ]
+
+A simple typo fix in the nl error message (fbd -> fdb).
+
+CC: David Ahern <dsahern@gmail.com>
+Fixes: 8c6e137fbc7f ("rtnetlink: Update rtnl_fdb_dump for strict data checking")
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3916,7 +3916,7 @@ static int valid_fdb_dump_strict(const s
+ ndm = nlmsg_data(nlh);
+ if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
+ ndm->ndm_flags || ndm->ndm_type) {
+- NL_SET_ERR_MSG(extack, "Invalid values in header for fbd dump request");
++ NL_SET_ERR_MSG(extack, "Invalid values in header for fdb dump request");
+ return -EINVAL;
+ }
+
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Ursula Braun <ubraun@linux.ibm.com>
+Date: Wed, 23 Oct 2019 15:44:05 +0200
+Subject: net/smc: fix closing of fallback SMC sockets
+
+From: Ursula Braun <ubraun@linux.ibm.com>
+
+[ Upstream commit f536dffc0b79738c3104af999318279dccbaa261 ]
+
+For SMC sockets forced to fallback to TCP, the file is propagated
+from the outer SMC to the internal TCP socket. When closing the SMC
+socket, the internal TCP socket file pointer must be restored to the
+original NULL value, otherwise memory leaks may show up (found with
+CONFIG_DEBUG_KMEMLEAK).
+
+The internal TCP socket is released in smc_clcsock_release(), which
+calls __sock_release() function in net/socket.c. This calls the
+needed iput(SOCK_INODE(sock)) only, if the file pointer has been reset
+to the original NULL-value.
+
+Fixes: 07603b230895 ("net/smc: propagate file from SMC to TCP socket")
+Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
+Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/smc/af_smc.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/net/smc/af_smc.c
++++ b/net/smc/af_smc.c
+@@ -123,6 +123,12 @@ struct proto smc_proto6 = {
+ };
+ EXPORT_SYMBOL_GPL(smc_proto6);
+
++static void smc_restore_fallback_changes(struct smc_sock *smc)
++{
++ smc->clcsock->file->private_data = smc->sk.sk_socket;
++ smc->clcsock->file = NULL;
++}
++
+ static int __smc_release(struct smc_sock *smc)
+ {
+ struct sock *sk = &smc->sk;
+@@ -141,6 +147,7 @@ static int __smc_release(struct smc_sock
+ }
+ sk->sk_state = SMC_CLOSED;
+ sk->sk_state_change(sk);
++ smc_restore_fallback_changes(smc);
+ }
+
+ sk->sk_prot->unhash(sk);
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Ursula Braun <ubraun@linux.ibm.com>
+Date: Tue, 29 Oct 2019 12:41:26 +0100
+Subject: net/smc: fix refcounting for non-blocking connect()
+
+From: Ursula Braun <ubraun@linux.ibm.com>
+
+[ Upstream commit 301428ea3708188dc4a243e6e6b46c03b46a0fbc ]
+
+If a nonblocking socket is immediately closed after connect(),
+the connect worker may not have started. This results in a refcount
+problem, since sock_hold() is called from the connect worker.
+This patch moves the sock_hold in front of the connect worker
+scheduling.
+
+Reported-by: syzbot+4c063e6dea39e4b79f29@syzkaller.appspotmail.com
+Fixes: 50717a37db03 ("net/smc: nonblocking connect rework")
+Reviewed-by: Karsten Graul <kgraul@linux.ibm.com>
+Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/smc/af_smc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/smc/af_smc.c
++++ b/net/smc/af_smc.c
+@@ -707,8 +707,6 @@ static int __smc_connect(struct smc_sock
+ int smc_type;
+ int rc = 0;
+
+- sock_hold(&smc->sk); /* sock put in passive closing */
+-
+ if (smc->use_fallback)
+ return smc_connect_fallback(smc, smc->fallback_rsn);
+
+@@ -853,6 +851,8 @@ static int smc_connect(struct socket *so
+ rc = kernel_connect(smc->clcsock, addr, alen, flags);
+ if (rc && rc != -EINPROGRESS)
+ goto out;
++
++ sock_hold(&smc->sk); /* sock put in passive closing */
+ if (flags & O_NONBLOCK) {
+ if (schedule_work(&smc->connect_work))
+ smc->connect_nonblock = 1;
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Ursula Braun <ubraun@linux.ibm.com>
+Date: Wed, 23 Oct 2019 15:44:06 +0200
+Subject: net/smc: keep vlan_id for SMC-R in smc_listen_work()
+
+From: Ursula Braun <ubraun@linux.ibm.com>
+
+[ Upstream commit ca5f8d2dd5229ccacdd5cfde1ce4d32b0810e454 ]
+
+Creating of an SMC-R connection with vlan-id fails, because
+smc_listen_work() determines the vlan_id of the connection,
+saves it in struct smc_init_info ini, but clears the ini area
+again if SMC-D is not applicable.
+This patch just resets the ISM device before investigating
+SMC-R availability.
+
+Fixes: bc36d2fc93eb ("net/smc: consolidate function parameters")
+Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
+Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/smc/af_smc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/smc/af_smc.c
++++ b/net/smc/af_smc.c
+@@ -1298,8 +1298,8 @@ static void smc_listen_work(struct work_
+ /* check if RDMA is available */
+ if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */
+ /* prepare RDMA check */
+- memset(&ini, 0, sizeof(ini));
+ ini.is_smcd = false;
++ ini.ism_dev = NULL;
+ ini.ib_lcl = &pclc->lcl;
+ rc = smc_find_rdma_device(new_smc, &ini);
+ if (rc) {
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Andrew Lunn <andrew@lunn.ch>
+Date: Thu, 17 Oct 2019 21:29:26 +0200
+Subject: net: usb: lan78xx: Connect PHY before registering MAC
+
+From: Andrew Lunn <andrew@lunn.ch>
+
+[ Upstream commit 38b4fe320119859c11b1dc06f6b4987a16344fa1 ]
+
+As soon as the netdev is registers, the kernel can start using the
+interface. If the driver connects the MAC to the PHY after the netdev
+is registered, there is a race condition where the interface can be
+opened without having the PHY connected.
+
+Change the order to close this race condition.
+
+Fixes: 92571a1aae40 ("lan78xx: Connect phy early")
+Reported-by: Daniel Wagner <dwagner@suse.de>
+Signed-off-by: Andrew Lunn <andrew@lunn.ch>
+Tested-by: Daniel Wagner <dwagner@suse.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/lan78xx.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -3792,10 +3792,14 @@ static int lan78xx_probe(struct usb_inte
+ /* driver requires remote-wakeup capability during autosuspend. */
+ intf->needs_remote_wakeup = 1;
+
++ ret = lan78xx_phy_init(dev);
++ if (ret < 0)
++ goto out4;
++
+ ret = register_netdev(netdev);
+ if (ret != 0) {
+ netif_err(dev, probe, netdev, "couldn't register the device\n");
+- goto out4;
++ goto out5;
+ }
+
+ usb_set_intfdata(intf, dev);
+@@ -3808,14 +3812,10 @@ static int lan78xx_probe(struct usb_inte
+ pm_runtime_set_autosuspend_delay(&udev->dev,
+ DEFAULT_AUTOSUSPEND_DELAY);
+
+- ret = lan78xx_phy_init(dev);
+- if (ret < 0)
+- goto out5;
+-
+ return 0;
+
+ out5:
+- unregister_netdev(netdev);
++ phy_disconnect(netdev->phydev);
+ out4:
+ usb_free_urb(dev->urb_intr);
+ out3:
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Daniel Wagner <dwagner@suse.de>
+Date: Fri, 25 Oct 2019 10:04:13 +0200
+Subject: net: usb: lan78xx: Disable interrupts before calling generic_handle_irq()
+
+From: Daniel Wagner <dwagner@suse.de>
+
+[ Upstream commit 0a29ac5bd3a988dc151c8d26910dec2557421f64 ]
+
+lan78xx_status() will run with interrupts enabled due to the change in
+ed194d136769 ("usb: core: remove local_irq_save() around ->complete()
+handler"). generic_handle_irq() expects to be run with IRQs disabled.
+
+[ 4.886203] 000: irq 79 handler irq_default_primary_handler+0x0/0x8 enabled interrupts
+[ 4.886243] 000: WARNING: CPU: 0 PID: 0 at kernel/irq/handle.c:152 __handle_irq_event_percpu+0x154/0x168
+[ 4.896294] 000: Modules linked in:
+[ 4.896301] 000: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.6 #39
+[ 4.896310] 000: Hardware name: Raspberry Pi 3 Model B+ (DT)
+[ 4.896315] 000: pstate: 60000005 (nZCv daif -PAN -UAO)
+[ 4.896321] 000: pc : __handle_irq_event_percpu+0x154/0x168
+[ 4.896331] 000: lr : __handle_irq_event_percpu+0x154/0x168
+[ 4.896339] 000: sp : ffff000010003cc0
+[ 4.896346] 000: x29: ffff000010003cc0 x28: 0000000000000060
+[ 4.896355] 000: x27: ffff000011021980 x26: ffff00001189c72b
+[ 4.896364] 000: x25: ffff000011702bc0 x24: ffff800036d6e400
+[ 4.896373] 000: x23: 000000000000004f x22: ffff000010003d64
+[ 4.896381] 000: x21: 0000000000000000 x20: 0000000000000002
+[ 4.896390] 000: x19: ffff8000371c8480 x18: 0000000000000060
+[ 4.896398] 000: x17: 0000000000000000 x16: 00000000000000eb
+[ 4.896406] 000: x15: ffff000011712d18 x14: 7265746e69206465
+[ 4.896414] 000: x13: ffff000010003ba0 x12: ffff000011712df0
+[ 4.896422] 000: x11: 0000000000000001 x10: ffff000011712e08
+[ 4.896430] 000: x9 : 0000000000000001 x8 : 000000000003c920
+[ 4.896437] 000: x7 : ffff0000118cc410 x6 : ffff0000118c7f00
+[ 4.896445] 000: x5 : 000000000003c920 x4 : 0000000000004510
+[ 4.896453] 000: x3 : ffff000011712dc8 x2 : 0000000000000000
+[ 4.896461] 000: x1 : 73a3f67df94c1500 x0 : 0000000000000000
+[ 4.896466] 000: Call trace:
+[ 4.896471] 000: __handle_irq_event_percpu+0x154/0x168
+[ 4.896481] 000: handle_irq_event_percpu+0x50/0xb0
+[ 4.896489] 000: handle_irq_event+0x40/0x98
+[ 4.896497] 000: handle_simple_irq+0xa4/0xf0
+[ 4.896505] 000: generic_handle_irq+0x24/0x38
+[ 4.896513] 000: intr_complete+0xb0/0xe0
+[ 4.896525] 000: __usb_hcd_giveback_urb+0x58/0xd8
+[ 4.896533] 000: usb_giveback_urb_bh+0xd0/0x170
+[ 4.896539] 000: tasklet_action_common.isra.0+0x9c/0x128
+[ 4.896549] 000: tasklet_hi_action+0x24/0x30
+[ 4.896556] 000: __do_softirq+0x120/0x23c
+[ 4.896564] 000: irq_exit+0xb8/0xd8
+[ 4.896571] 000: __handle_domain_irq+0x64/0xb8
+[ 4.896579] 000: bcm2836_arm_irqchip_handle_irq+0x60/0xc0
+[ 4.896586] 000: el1_irq+0xb8/0x140
+[ 4.896592] 000: arch_cpu_idle+0x10/0x18
+[ 4.896601] 000: do_idle+0x200/0x280
+[ 4.896608] 000: cpu_startup_entry+0x20/0x28
+[ 4.896615] 000: rest_init+0xb4/0xc0
+[ 4.896623] 000: arch_call_rest_init+0xc/0x14
+[ 4.896632] 000: start_kernel+0x454/0x480
+
+Fixes: ed194d136769 ("usb: core: remove local_irq_save() around ->complete() handler")
+Cc: Woojung Huh <woojung.huh@microchip.com>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Andrew Lunn <andrew@lunn.ch>
+Cc: Stefan Wahren <wahrenst@gmx.net>
+Cc: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: David Miller <davem@davemloft.net>
+Signed-off-by: Daniel Wagner <dwagner@suse.de>
+Tested-by: Stefan Wahren <wahrenst@gmx.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/lan78xx.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -1265,8 +1265,11 @@ static void lan78xx_status(struct lan78x
+ netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
+ lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
+
+- if (dev->domain_data.phyirq > 0)
++ if (dev->domain_data.phyirq > 0) {
++ local_irq_disable();
+ generic_handle_irq(dev->domain_data.phyirq);
++ local_irq_enable();
++ }
+ } else
+ netdev_warn(dev->net,
+ "unexpected interrupt: 0x%08x\n", intdata);
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Oct 2019 22:44:51 -0700
+Subject: net: use skb_queue_empty_lockless() in busy poll contexts
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3f926af3f4d688e2e11e7f8ed04e277a14d4d4a4 ]
+
+Busy polling usually runs without locks.
+Let's use skb_queue_empty_lockless() instead of skb_queue_empty()
+
+Also uses READ_ONCE() in __skb_try_recv_datagram() to address
+a similar potential problem.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/chelsio/chtls/chtls_io.c | 2 +-
+ net/core/datagram.c | 2 +-
+ net/core/sock.c | 2 +-
+ net/ipv4/tcp.c | 2 +-
+ net/sctp/socket.c | 2 +-
+ 5 files changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/crypto/chelsio/chtls/chtls_io.c
++++ b/drivers/crypto/chelsio/chtls/chtls_io.c
+@@ -1701,7 +1701,7 @@ int chtls_recvmsg(struct sock *sk, struc
+ return peekmsg(sk, msg, len, nonblock, flags);
+
+ if (sk_can_busy_loop(sk) &&
+- skb_queue_empty(&sk->sk_receive_queue) &&
++ skb_queue_empty_lockless(&sk->sk_receive_queue) &&
+ sk->sk_state == TCP_ESTABLISHED)
+ sk_busy_loop(sk, nonblock);
+
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -278,7 +278,7 @@ struct sk_buff *__skb_try_recv_datagram(
+ break;
+
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
+- } while (sk->sk_receive_queue.prev != *last);
++ } while (READ_ONCE(sk->sk_receive_queue.prev) != *last);
+
+ error = -EAGAIN;
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -3593,7 +3593,7 @@ bool sk_busy_loop_end(void *p, unsigned
+ {
+ struct sock *sk = p;
+
+- return !skb_queue_empty(&sk->sk_receive_queue) ||
++ return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
+ sk_busy_loop_timeout(sk, start_time);
+ }
+ EXPORT_SYMBOL(sk_busy_loop_end);
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1961,7 +1961,7 @@ int tcp_recvmsg(struct sock *sk, struct
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return inet_recv_error(sk, msg, len, addr_len);
+
+- if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
++ if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) &&
+ (sk->sk_state == TCP_ESTABLISHED))
+ sk_busy_loop(sk, nonblock);
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -8724,7 +8724,7 @@ struct sk_buff *sctp_skb_recv_datagram(s
+ if (sk_can_busy_loop(sk)) {
+ sk_busy_loop(sk, noblock);
+
+- if (!skb_queue_empty(&sk->sk_receive_queue))
++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ continue;
+ }
+
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Oct 2019 22:44:50 -0700
+Subject: net: use skb_queue_empty_lockless() in poll() handlers
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3ef7cf57c72f32f61e97f8fa401bc39ea1f1a5d4 ]
+
+Many poll() handlers are lockless. Using skb_queue_empty_lockless()
+instead of skb_queue_empty() is more appropriate.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/isdn/capi/capi.c | 2 +-
+ net/atm/common.c | 2 +-
+ net/bluetooth/af_bluetooth.c | 4 ++--
+ net/caif/caif_socket.c | 2 +-
+ net/core/datagram.c | 4 ++--
+ net/decnet/af_decnet.c | 2 +-
+ net/ipv4/tcp.c | 2 +-
+ net/ipv4/udp.c | 2 +-
+ net/nfc/llcp_sock.c | 4 ++--
+ net/phonet/socket.c | 4 ++--
+ net/sctp/socket.c | 4 ++--
+ net/tipc/socket.c | 4 ++--
+ net/unix/af_unix.c | 6 +++---
+ net/vmw_vsock/af_vsock.c | 2 +-
+ 14 files changed, 22 insertions(+), 22 deletions(-)
+
+--- a/drivers/isdn/capi/capi.c
++++ b/drivers/isdn/capi/capi.c
+@@ -744,7 +744,7 @@ capi_poll(struct file *file, poll_table
+
+ poll_wait(file, &(cdev->recvwait), wait);
+ mask = EPOLLOUT | EPOLLWRNORM;
+- if (!skb_queue_empty(&cdev->recvqueue))
++ if (!skb_queue_empty_lockless(&cdev->recvqueue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+ return mask;
+ }
+--- a/net/atm/common.c
++++ b/net/atm/common.c
+@@ -668,7 +668,7 @@ __poll_t vcc_poll(struct file *file, str
+ mask |= EPOLLHUP;
+
+ /* readable? */
+- if (!skb_queue_empty(&sk->sk_receive_queue))
++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+ /* writable? */
+--- a/net/bluetooth/af_bluetooth.c
++++ b/net/bluetooth/af_bluetooth.c
+@@ -460,7 +460,7 @@ __poll_t bt_sock_poll(struct file *file,
+ if (sk->sk_state == BT_LISTEN)
+ return bt_accept_poll(sk);
+
+- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
++ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
+
+@@ -470,7 +470,7 @@ __poll_t bt_sock_poll(struct file *file,
+ if (sk->sk_shutdown == SHUTDOWN_MASK)
+ mask |= EPOLLHUP;
+
+- if (!skb_queue_empty(&sk->sk_receive_queue))
++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+ if (sk->sk_state == BT_CLOSED)
+--- a/net/caif/caif_socket.c
++++ b/net/caif/caif_socket.c
+@@ -953,7 +953,7 @@ static __poll_t caif_poll(struct file *f
+ mask |= EPOLLRDHUP;
+
+ /* readable? */
+- if (!skb_queue_empty(&sk->sk_receive_queue) ||
++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
+ (sk->sk_shutdown & RCV_SHUTDOWN))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -767,7 +767,7 @@ __poll_t datagram_poll(struct file *file
+ mask = 0;
+
+ /* exceptional events? */
+- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
++ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
+
+@@ -777,7 +777,7 @@ __poll_t datagram_poll(struct file *file
+ mask |= EPOLLHUP;
+
+ /* readable? */
+- if (!skb_queue_empty(&sk->sk_receive_queue))
++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+ /* Connection-based need to check for termination and startup */
+--- a/net/decnet/af_decnet.c
++++ b/net/decnet/af_decnet.c
+@@ -1205,7 +1205,7 @@ static __poll_t dn_poll(struct file *fil
+ struct dn_scp *scp = DN_SK(sk);
+ __poll_t mask = datagram_poll(file, sock, wait);
+
+- if (!skb_queue_empty(&scp->other_receive_queue))
++ if (!skb_queue_empty_lockless(&scp->other_receive_queue))
+ mask |= EPOLLRDBAND;
+
+ return mask;
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -584,7 +584,7 @@ __poll_t tcp_poll(struct file *file, str
+ }
+ /* This barrier is coupled with smp_wmb() in tcp_reset() */
+ smp_rmb();
+- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
++ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+ mask |= EPOLLERR;
+
+ return mask;
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -2723,7 +2723,7 @@ __poll_t udp_poll(struct file *file, str
+ __poll_t mask = datagram_poll(file, sock, wait);
+ struct sock *sk = sock->sk;
+
+- if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
++ if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+ /* Check for false positives due to checksum errors */
+--- a/net/nfc/llcp_sock.c
++++ b/net/nfc/llcp_sock.c
+@@ -554,11 +554,11 @@ static __poll_t llcp_sock_poll(struct fi
+ if (sk->sk_state == LLCP_LISTEN)
+ return llcp_accept_poll(sk);
+
+- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
++ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
+
+- if (!skb_queue_empty(&sk->sk_receive_queue))
++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+ if (sk->sk_state == LLCP_CLOSED)
+--- a/net/phonet/socket.c
++++ b/net/phonet/socket.c
+@@ -338,9 +338,9 @@ static __poll_t pn_socket_poll(struct fi
+
+ if (sk->sk_state == TCP_CLOSE)
+ return EPOLLERR;
+- if (!skb_queue_empty(&sk->sk_receive_queue))
++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+- if (!skb_queue_empty(&pn->ctrlreq_queue))
++ if (!skb_queue_empty_lockless(&pn->ctrlreq_queue))
+ mask |= EPOLLPRI;
+ if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
+ return EPOLLHUP;
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -8329,7 +8329,7 @@ __poll_t sctp_poll(struct file *file, st
+ mask = 0;
+
+ /* Is there any exceptional events? */
+- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
++ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+@@ -8338,7 +8338,7 @@ __poll_t sctp_poll(struct file *file, st
+ mask |= EPOLLHUP;
+
+ /* Is it readable? Reconsider this code with TCP-style support. */
+- if (!skb_queue_empty(&sk->sk_receive_queue))
++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+ /* The association is either gone or not ready. */
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -740,7 +740,7 @@ static __poll_t tipc_poll(struct file *f
+ /* fall through */
+ case TIPC_LISTEN:
+ case TIPC_CONNECTING:
+- if (!skb_queue_empty(&sk->sk_receive_queue))
++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ revents |= EPOLLIN | EPOLLRDNORM;
+ break;
+ case TIPC_OPEN:
+@@ -748,7 +748,7 @@ static __poll_t tipc_poll(struct file *f
+ revents |= EPOLLOUT;
+ if (!tipc_sk_type_connectionless(sk))
+ break;
+- if (skb_queue_empty(&sk->sk_receive_queue))
++ if (skb_queue_empty_lockless(&sk->sk_receive_queue))
+ break;
+ revents |= EPOLLIN | EPOLLRDNORM;
+ break;
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -2599,7 +2599,7 @@ static __poll_t unix_poll(struct file *f
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
+
+ /* readable? */
+- if (!skb_queue_empty(&sk->sk_receive_queue))
++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+ /* Connection-based need to check for termination and startup */
+@@ -2628,7 +2628,7 @@ static __poll_t unix_dgram_poll(struct f
+ mask = 0;
+
+ /* exceptional events? */
+- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
++ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
+
+@@ -2638,7 +2638,7 @@ static __poll_t unix_dgram_poll(struct f
+ mask |= EPOLLHUP;
+
+ /* readable? */
+- if (!skb_queue_empty(&sk->sk_receive_queue))
++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+ /* Connection-based need to check for termination and startup */
+--- a/net/vmw_vsock/af_vsock.c
++++ b/net/vmw_vsock/af_vsock.c
+@@ -870,7 +870,7 @@ static __poll_t vsock_poll(struct file *
+ * the queue and write as long as the socket isn't shutdown for
+ * sending.
+ */
+- if (!skb_queue_empty(&sk->sk_receive_queue) ||
++ if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
+ (sk->sk_shutdown & RCV_SHUTDOWN)) {
+ mask |= EPOLLIN | EPOLLRDNORM;
+ }
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: zhanglin <zhang.lin16@zte.com.cn>
+Date: Sat, 26 Oct 2019 15:54:16 +0800
+Subject: net: Zeroing the structure ethtool_wolinfo in ethtool_get_wol()
+
+From: zhanglin <zhang.lin16@zte.com.cn>
+
+[ Upstream commit 5ff223e86f5addbfae26419cbb5d61d98f6fbf7d ]
+
+memset() the structure ethtool_wolinfo that has padded bytes
+but the padded bytes have not been zeroed out.
+
+Signed-off-by: zhanglin <zhang.lin16@zte.com.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/ethtool.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/core/ethtool.c
++++ b/net/core/ethtool.c
+@@ -1395,11 +1395,13 @@ static int ethtool_reset(struct net_devi
+
+ static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
+ {
+- struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
++ struct ethtool_wolinfo wol;
+
+ if (!dev->ethtool_ops->get_wol)
+ return -EOPNOTSUPP;
+
++ memset(&wol, 0, sizeof(struct ethtool_wolinfo));
++ wol.cmd = ETHTOOL_GWOL;
+ dev->ethtool_ops->get_wol(dev, &wol);
+
+ if (copy_to_user(useraddr, &wol, sizeof(wol)))
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Guillaume Nault <gnault@redhat.com>
+Date: Wed, 23 Oct 2019 18:39:04 +0200
+Subject: netns: fix GFP flags in rtnl_net_notifyid()
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit d4e4fdf9e4a27c87edb79b1478955075be141f67 ]
+
+In rtnl_net_notifyid(), we certainly can't pass a null GFP flag to
+rtnl_notify(). A GFP_KERNEL flag would be fine in most circumstances,
+but there are a few paths calling rtnl_net_notifyid() from atomic
+context or from RCU critical sections. The later also precludes the use
+of gfp_any() as it wouldn't detect the RCU case. Also, the nlmsg_new()
+call is wrong too, as it uses GFP_KERNEL unconditionally.
+
+Therefore, we need to pass the GFP flags as parameter and propagate it
+through function calls until the proper flags can be determined.
+
+In most cases, GFP_KERNEL is fine. The exceptions are:
+ * openvswitch: ovs_vport_cmd_get() and ovs_vport_cmd_dump()
+ indirectly call rtnl_net_notifyid() from RCU critical section,
+
+ * rtnetlink: rtmsg_ifinfo_build_skb() already receives GFP flags as
+ parameter.
+
+Also, in ovs_vport_cmd_build_info(), let's change the GFP flags used
+by nlmsg_new(). The function is allowed to sleep, so better make the
+flags consistent with the ones used in the following
+ovs_vport_cmd_fill_info() call.
+
+Found by code inspection.
+
+Fixes: 9a9634545c70 ("netns: notify netns id events")
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Acked-by: Pravin B Shelar <pshelar@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/net_namespace.h | 2 +-
+ net/core/dev.c | 2 +-
+ net/core/net_namespace.c | 17 +++++++++--------
+ net/core/rtnetlink.c | 14 +++++++-------
+ net/openvswitch/datapath.c | 20 +++++++++++---------
+ 5 files changed, 29 insertions(+), 26 deletions(-)
+
+--- a/include/net/net_namespace.h
++++ b/include/net/net_namespace.h
+@@ -333,7 +333,7 @@ static inline struct net *read_pnet(cons
+ #define __net_initconst __initconst
+ #endif
+
+-int peernet2id_alloc(struct net *net, struct net *peer);
++int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp);
+ int peernet2id(struct net *net, struct net *peer);
+ bool peernet_has_id(struct net *net, struct net *peer);
+ struct net *get_net_ns_by_id(struct net *net, int id);
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -9411,7 +9411,7 @@ int dev_change_net_namespace(struct net_
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+ rcu_barrier();
+
+- new_nsid = peernet2id_alloc(dev_net(dev), net);
++ new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
+ /* If there is an ifindex conflict assign a new one */
+ if (__dev_get_by_index(net, dev->ifindex))
+ new_ifindex = dev_new_index(net);
+--- a/net/core/net_namespace.c
++++ b/net/core/net_namespace.c
+@@ -245,11 +245,11 @@ static int __peernet2id(struct net *net,
+ return __peernet2id_alloc(net, peer, &no);
+ }
+
+-static void rtnl_net_notifyid(struct net *net, int cmd, int id);
++static void rtnl_net_notifyid(struct net *net, int cmd, int id, gfp_t gfp);
+ /* This function returns the id of a peer netns. If no id is assigned, one will
+ * be allocated and returned.
+ */
+-int peernet2id_alloc(struct net *net, struct net *peer)
++int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
+ {
+ bool alloc = false, alive = false;
+ int id;
+@@ -268,7 +268,7 @@ int peernet2id_alloc(struct net *net, st
+ id = __peernet2id_alloc(net, peer, &alloc);
+ spin_unlock_bh(&net->nsid_lock);
+ if (alloc && id >= 0)
+- rtnl_net_notifyid(net, RTM_NEWNSID, id);
++ rtnl_net_notifyid(net, RTM_NEWNSID, id, gfp);
+ if (alive)
+ put_net(peer);
+ return id;
+@@ -532,7 +532,8 @@ static void unhash_nsid(struct net *net,
+ idr_remove(&tmp->netns_ids, id);
+ spin_unlock_bh(&tmp->nsid_lock);
+ if (id >= 0)
+- rtnl_net_notifyid(tmp, RTM_DELNSID, id);
++ rtnl_net_notifyid(tmp, RTM_DELNSID, id,
++ GFP_KERNEL);
+ if (tmp == last)
+ break;
+ }
+@@ -764,7 +765,7 @@ static int rtnl_net_newid(struct sk_buff
+ err = alloc_netid(net, peer, nsid);
+ spin_unlock_bh(&net->nsid_lock);
+ if (err >= 0) {
+- rtnl_net_notifyid(net, RTM_NEWNSID, err);
++ rtnl_net_notifyid(net, RTM_NEWNSID, err, GFP_KERNEL);
+ err = 0;
+ } else if (err == -ENOSPC && nsid >= 0) {
+ err = -EEXIST;
+@@ -1051,7 +1052,7 @@ end:
+ return err < 0 ? err : skb->len;
+ }
+
+-static void rtnl_net_notifyid(struct net *net, int cmd, int id)
++static void rtnl_net_notifyid(struct net *net, int cmd, int id, gfp_t gfp)
+ {
+ struct net_fill_args fillargs = {
+ .cmd = cmd,
+@@ -1060,7 +1061,7 @@ static void rtnl_net_notifyid(struct net
+ struct sk_buff *msg;
+ int err = -ENOMEM;
+
+- msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
++ msg = nlmsg_new(rtnl_net_get_size(), gfp);
+ if (!msg)
+ goto out;
+
+@@ -1068,7 +1069,7 @@ static void rtnl_net_notifyid(struct net
+ if (err < 0)
+ goto err_out;
+
+- rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
++ rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, gfp);
+ return;
+
+ err_out:
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -1523,7 +1523,7 @@ static noinline_for_stack int nla_put_if
+
+ static int rtnl_fill_link_netnsid(struct sk_buff *skb,
+ const struct net_device *dev,
+- struct net *src_net)
++ struct net *src_net, gfp_t gfp)
+ {
+ bool put_iflink = false;
+
+@@ -1531,7 +1531,7 @@ static int rtnl_fill_link_netnsid(struct
+ struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
+
+ if (!net_eq(dev_net(dev), link_net)) {
+- int id = peernet2id_alloc(src_net, link_net);
++ int id = peernet2id_alloc(src_net, link_net, gfp);
+
+ if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
+ return -EMSGSIZE;
+@@ -1589,7 +1589,7 @@ static int rtnl_fill_ifinfo(struct sk_bu
+ int type, u32 pid, u32 seq, u32 change,
+ unsigned int flags, u32 ext_filter_mask,
+ u32 event, int *new_nsid, int new_ifindex,
+- int tgt_netnsid)
++ int tgt_netnsid, gfp_t gfp)
+ {
+ struct ifinfomsg *ifm;
+ struct nlmsghdr *nlh;
+@@ -1681,7 +1681,7 @@ static int rtnl_fill_ifinfo(struct sk_bu
+ goto nla_put_failure;
+ }
+
+- if (rtnl_fill_link_netnsid(skb, dev, src_net))
++ if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp))
+ goto nla_put_failure;
+
+ if (new_nsid &&
+@@ -2001,7 +2001,7 @@ walk_entries:
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq, 0, flags,
+ ext_filter_mask, 0, NULL, 0,
+- netnsid);
++ netnsid, GFP_KERNEL);
+
+ if (err < 0) {
+ if (likely(skb->len))
+@@ -3359,7 +3359,7 @@ static int rtnl_getlink(struct sk_buff *
+ err = rtnl_fill_ifinfo(nskb, dev, net,
+ RTM_NEWLINK, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq, 0, 0, ext_filter_mask,
+- 0, NULL, 0, netnsid);
++ 0, NULL, 0, netnsid, GFP_KERNEL);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in if_nlmsg_size */
+ WARN_ON(err == -EMSGSIZE);
+@@ -3471,7 +3471,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(i
+
+ err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
+ type, 0, 0, change, 0, 0, event,
+- new_nsid, new_ifindex, -1);
++ new_nsid, new_ifindex, -1, flags);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in if_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -1850,7 +1850,7 @@ static struct genl_family dp_datapath_ge
+ /* Called with ovs_mutex or RCU read lock. */
+ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
+ struct net *net, u32 portid, u32 seq,
+- u32 flags, u8 cmd)
++ u32 flags, u8 cmd, gfp_t gfp)
+ {
+ struct ovs_header *ovs_header;
+ struct ovs_vport_stats vport_stats;
+@@ -1871,7 +1871,7 @@ static int ovs_vport_cmd_fill_info(struc
+ goto nla_put_failure;
+
+ if (!net_eq(net, dev_net(vport->dev))) {
+- int id = peernet2id_alloc(net, dev_net(vport->dev));
++ int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
+
+ if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
+ goto nla_put_failure;
+@@ -1912,11 +1912,12 @@ struct sk_buff *ovs_vport_cmd_build_info
+ struct sk_buff *skb;
+ int retval;
+
+- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
++ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+- retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
++ retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
++ GFP_KERNEL);
+ BUG_ON(retval < 0);
+
+ return skb;
+@@ -2058,7 +2059,7 @@ restart:
+
+ err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+ info->snd_portid, info->snd_seq, 0,
+- OVS_VPORT_CMD_NEW);
++ OVS_VPORT_CMD_NEW, GFP_KERNEL);
+
+ new_headroom = netdev_get_fwd_headroom(vport->dev);
+
+@@ -2119,7 +2120,7 @@ static int ovs_vport_cmd_set(struct sk_b
+
+ err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+ info->snd_portid, info->snd_seq, 0,
+- OVS_VPORT_CMD_SET);
++ OVS_VPORT_CMD_SET, GFP_KERNEL);
+ BUG_ON(err < 0);
+
+ ovs_unlock();
+@@ -2159,7 +2160,7 @@ static int ovs_vport_cmd_del(struct sk_b
+
+ err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+ info->snd_portid, info->snd_seq, 0,
+- OVS_VPORT_CMD_DEL);
++ OVS_VPORT_CMD_DEL, GFP_KERNEL);
+ BUG_ON(err < 0);
+
+ /* the vport deletion may trigger dp headroom update */
+@@ -2206,7 +2207,7 @@ static int ovs_vport_cmd_get(struct sk_b
+ goto exit_unlock_free;
+ err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+ info->snd_portid, info->snd_seq, 0,
+- OVS_VPORT_CMD_GET);
++ OVS_VPORT_CMD_GET, GFP_ATOMIC);
+ BUG_ON(err < 0);
+ rcu_read_unlock();
+
+@@ -2242,7 +2243,8 @@ static int ovs_vport_cmd_dump(struct sk_
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+- OVS_VPORT_CMD_GET) < 0)
++ OVS_VPORT_CMD_GET,
++ GFP_ATOMIC) < 0)
+ goto out;
+
+ j++;
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Kazutoshi Noguchi <noguchi.kazutosi@gmail.com>
+Date: Mon, 21 Oct 2019 00:03:07 +0900
+Subject: r8152: add device id for Lenovo ThinkPad USB-C Dock Gen 2
+
+From: Kazutoshi Noguchi <noguchi.kazutosi@gmail.com>
+
+[ Upstream commit b3060531979422d5bb18d80226f978910284dc70 ]
+
+This device is sold as 'ThinkPad USB-C Dock Gen 2 (40AS)'.
+Chipset is RTL8153 and works with r8152.
+Without this, the generic cdc_ether grabs the device, and the device jam
+connected networks up when the machine suspends.
+
+Signed-off-by: Kazutoshi Noguchi <noguchi.kazutosi@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/cdc_ether.c | 7 +++++++
+ drivers/net/usb/r8152.c | 1 +
+ 2 files changed, 8 insertions(+)
+
+--- a/drivers/net/usb/cdc_ether.c
++++ b/drivers/net/usb/cdc_ether.c
+@@ -787,6 +787,13 @@ static const struct usb_device_id produc
+ .driver_info = 0,
+ },
+
++/* ThinkPad USB-C Dock Gen 2 (based on Realtek RTL8153) */
++{
++ USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa387, USB_CLASS_COMM,
++ USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
++ .driver_info = 0,
++},
++
+ /* NVIDIA Tegra USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */
+ {
+ USB_DEVICE_AND_INTERFACE_INFO(NVIDIA_VENDOR_ID, 0x09ff, USB_CLASS_COMM,
+--- a/drivers/net/usb/r8152.c
++++ b/drivers/net/usb/r8152.c
+@@ -5402,6 +5402,7 @@ static const struct usb_device_id rtl815
+ {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)},
+ {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x720c)},
+ {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)},
++ {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0xa387)},
+ {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)},
+ {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)},
+ {REALTEK_USB_DEVICE(VENDOR_ID_TPLINK, 0x0601)},
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Heiner Kallweit <hkallweit1@gmail.com>
+Date: Fri, 1 Nov 2019 00:10:21 +0100
+Subject: r8169: fix wrong PHY ID issue with RTL8168dp
+
+From: Heiner Kallweit <hkallweit1@gmail.com>
+
+[ Upstream commit 62bdc8fd1c21d4263ebd18bec57f82532d09249f ]
+
+As reported in [0] at least one RTL8168dp version has problems
+establishing a link. This chip version has an integrated RTL8211b PHY,
+however the chip seems to report a wrong PHY ID, resulting in a wrong
+PHY driver (for Generic Realtek PHY) being loaded.
+Work around this issue by adding a hook to r8168dp_2_mdio_read()
+for returning the correct PHY ID.
+
+[0] https://bbs.archlinux.org/viewtopic.php?id=246508
+
+Fixes: 242cd9b5866a ("r8169: use phy_resume/phy_suspend")
+Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/r8169_main.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/realtek/r8169_main.c
++++ b/drivers/net/ethernet/realtek/r8169_main.c
+@@ -976,6 +976,10 @@ static int r8168dp_2_mdio_read(struct rt
+ {
+ int value;
+
++ /* Work around issue with chip reporting wrong PHY ID */
++ if (reg == MII_PHYSID2)
++ return 0xc912;
++
+ r8168dp_2_mdio_start(tp);
+
+ value = r8169_mdio_read(tp, reg);
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: David Howells <dhowells@redhat.com>
+Date: Thu, 31 Oct 2019 12:13:46 +0000
+Subject: rxrpc: Fix handling of last subpacket of jumbo packet
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit f9c32435ab7221d1d6cb35738fa85a2da012b23e ]
+
+When rxrpc_recvmsg_data() sets the return value to 1 because it's drained
+all the data for the last packet, it checks the last-packet flag on the
+whole packet - but this is wrong, since the last-packet flag is only set on
+the final subpacket of the last jumbo packet. This means that a call that
+receives its last packet in a jumbo packet won't complete properly.
+
+Fix this by having rxrpc_locate_data() determine the last-packet state of
+the subpacket it's looking at and passing that back to the caller rather
+than having the caller look in the packet header. The caller then needs to
+cache this in the rxrpc_call struct as rxrpc_locate_data() isn't then
+called again for this packet.
+
+Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
+Fixes: e2de6c404898 ("rxrpc: Use info in skbuff instead of reparsing a jumbo packet")
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rxrpc/ar-internal.h | 1 +
+ net/rxrpc/recvmsg.c | 18 +++++++++++++-----
+ 2 files changed, 14 insertions(+), 5 deletions(-)
+
+--- a/net/rxrpc/ar-internal.h
++++ b/net/rxrpc/ar-internal.h
+@@ -596,6 +596,7 @@ struct rxrpc_call {
+ int debug_id; /* debug ID for printks */
+ unsigned short rx_pkt_offset; /* Current recvmsg packet offset */
+ unsigned short rx_pkt_len; /* Current recvmsg packet len */
++ bool rx_pkt_last; /* Current recvmsg packet is last */
+
+ /* Rx/Tx circular buffer, depending on phase.
+ *
+--- a/net/rxrpc/recvmsg.c
++++ b/net/rxrpc/recvmsg.c
+@@ -267,11 +267,13 @@ static int rxrpc_verify_packet(struct rx
+ */
+ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
+ u8 *_annotation,
+- unsigned int *_offset, unsigned int *_len)
++ unsigned int *_offset, unsigned int *_len,
++ bool *_last)
+ {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned int offset = sizeof(struct rxrpc_wire_header);
+ unsigned int len;
++ bool last = false;
+ int ret;
+ u8 annotation = *_annotation;
+ u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET;
+@@ -281,6 +283,8 @@ static int rxrpc_locate_data(struct rxrp
+ len = skb->len - offset;
+ if (subpacket < sp->nr_subpackets - 1)
+ len = RXRPC_JUMBO_DATALEN;
++ else if (sp->rx_flags & RXRPC_SKB_INCL_LAST)
++ last = true;
+
+ if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) {
+ ret = rxrpc_verify_packet(call, skb, annotation, offset, len);
+@@ -291,6 +295,7 @@ static int rxrpc_locate_data(struct rxrp
+
+ *_offset = offset;
+ *_len = len;
++ *_last = last;
+ call->conn->security->locate_data(call, skb, _offset, _len);
+ return 0;
+ }
+@@ -309,7 +314,7 @@ static int rxrpc_recvmsg_data(struct soc
+ rxrpc_serial_t serial;
+ rxrpc_seq_t hard_ack, top, seq;
+ size_t remain;
+- bool last;
++ bool rx_pkt_last;
+ unsigned int rx_pkt_offset, rx_pkt_len;
+ int ix, copy, ret = -EAGAIN, ret2;
+
+@@ -319,6 +324,7 @@ static int rxrpc_recvmsg_data(struct soc
+
+ rx_pkt_offset = call->rx_pkt_offset;
+ rx_pkt_len = call->rx_pkt_len;
++ rx_pkt_last = call->rx_pkt_last;
+
+ if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) {
+ seq = call->rx_hard_ack;
+@@ -329,6 +335,7 @@ static int rxrpc_recvmsg_data(struct soc
+ /* Barriers against rxrpc_input_data(). */
+ hard_ack = call->rx_hard_ack;
+ seq = hard_ack + 1;
++
+ while (top = smp_load_acquire(&call->rx_top),
+ before_eq(seq, top)
+ ) {
+@@ -356,7 +363,8 @@ static int rxrpc_recvmsg_data(struct soc
+ if (rx_pkt_offset == 0) {
+ ret2 = rxrpc_locate_data(call, skb,
+ &call->rxtx_annotations[ix],
+- &rx_pkt_offset, &rx_pkt_len);
++ &rx_pkt_offset, &rx_pkt_len,
++ &rx_pkt_last);
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq,
+ rx_pkt_offset, rx_pkt_len, ret2);
+ if (ret2 < 0) {
+@@ -396,13 +404,12 @@ static int rxrpc_recvmsg_data(struct soc
+ }
+
+ /* The whole packet has been transferred. */
+- last = sp->hdr.flags & RXRPC_LAST_PACKET;
+ if (!(flags & MSG_PEEK))
+ rxrpc_rotate_rx_window(call);
+ rx_pkt_offset = 0;
+ rx_pkt_len = 0;
+
+- if (last) {
++ if (rx_pkt_last) {
+ ASSERTCMP(seq, ==, READ_ONCE(call->rx_top));
+ ret = 1;
+ goto out;
+@@ -415,6 +422,7 @@ out:
+ if (!(flags & MSG_PEEK)) {
+ call->rx_pkt_offset = rx_pkt_offset;
+ call->rx_pkt_len = rx_pkt_len;
++ call->rx_pkt_last = rx_pkt_last;
+ }
+ done:
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq,
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Sat, 26 Oct 2019 11:53:40 +0200
+Subject: selftests: fib_tests: add more tests for metric update
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 37de3b354150450ba12275397155e68113e99901 ]
+
+This patch adds two more tests to ipv4_addr_metric_test() to
+explicitly cover the scenarios fixed by the previous patch.
+
+Suggested-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/fib_tests.sh | 21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+--- a/tools/testing/selftests/net/fib_tests.sh
++++ b/tools/testing/selftests/net/fib_tests.sh
+@@ -1438,6 +1438,27 @@ ipv4_addr_metric_test()
+ fi
+ log_test $rc 0 "Prefix route with metric on link up"
+
++ # explicitly check for metric changes on edge scenarios
++ run_cmd "$IP addr flush dev dummy2"
++ run_cmd "$IP addr add dev dummy2 172.16.104.0/24 metric 259"
++ run_cmd "$IP addr change dev dummy2 172.16.104.0/24 metric 260"
++ rc=$?
++ if [ $rc -eq 0 ]; then
++ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.0 metric 260"
++ rc=$?
++ fi
++ log_test $rc 0 "Modify metric of .0/24 address"
++
++ run_cmd "$IP addr flush dev dummy2"
++ run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260"
++ run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261"
++ rc=$?
++ if [ $rc -eq 0 ]; then
++ check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
++ rc=$?
++ fi
++ log_test $rc 0 "Modify metric of address with peer route"
++
+ $IP li del dummy1
+ $IP li del dummy2
+ cleanup
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Wei Wang <weiwan@google.com>
+Date: Thu, 31 Oct 2019 16:24:36 -0700
+Subject: selftests: net: reuseport_dualstack: fix uninitalized parameter
+
+From: Wei Wang <weiwan@google.com>
+
+[ Upstream commit d64479a3e3f9924074ca7b50bd72fa5211dca9c1 ]
+
+This test reports EINVAL for getsockopt(SOL_SOCKET, SO_DOMAIN)
+occasionally due to the uninitialized length parameter.
+Initialize it to fix this, and also use int for "test_family" to comply
+with the API standard.
+
+Fixes: d6a61f80b871 ("soreuseport: test mixed v4/v6 sockets")
+Reported-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Wei Wang <weiwan@google.com>
+Cc: Craig Gallek <cgallek@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/reuseport_dualstack.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/net/reuseport_dualstack.c
++++ b/tools/testing/selftests/net/reuseport_dualstack.c
+@@ -129,7 +129,7 @@ static void test(int *rcv_fds, int count
+ {
+ struct epoll_event ev;
+ int epfd, i, test_fd;
+- uint16_t test_family;
++ int test_family;
+ socklen_t len;
+
+ epfd = epoll_create(1);
+@@ -146,6 +146,7 @@ static void test(int *rcv_fds, int count
+ send_from_v4(proto);
+
+ test_fd = receive_once(epfd, proto);
++ len = sizeof(test_family);
+ if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len))
+ error(1, errno, "failed to read socket domain");
+ if (test_family != AF_INET)
nbd-protect-cmd-status-with-cmd-lock.patch
nbd-handle-racing-with-error-ed-out-commands.patch
ata-libahci_platform-fix-regulator_get_optional-misu.patch
+cxgb4-fix-panic-when-attaching-to-uld-fail.patch
+cxgb4-request-the-tx-cidx-updates-to-status-page.patch
+dccp-do-not-leak-jiffies-on-the-wire.patch
+erspan-fix-the-tun_info-options_len-check-for-erspan.patch
+inet-stop-leaking-jiffies-on-the-wire.patch
+net-annotate-accesses-to-sk-sk_incoming_cpu.patch
+net-annotate-lockless-accesses-to-sk-sk_napi_id.patch
+net-dsa-bcm_sf2-fix-imp-setup-for-port-different-than-8.patch
+net-ethernet-ftgmac100-fix-dma-coherency-issue-with-sw-checksum.patch
+net-fix-sk_page_frag-recursion-from-memory-reclaim.patch
+net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch
+net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch
+netns-fix-gfp-flags-in-rtnl_net_notifyid.patch
+net-rtnetlink-fix-a-typo-fbd-fdb.patch
+net-usb-lan78xx-disable-interrupts-before-calling-generic_handle_irq.patch
+net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch
+selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch
+udp-fix-data-race-in-udp_set_dev_scratch.patch
+vxlan-check-tun_info-options_len-properly.patch
+net-add-skb_queue_empty_lockless.patch
+udp-use-skb_queue_empty_lockless.patch
+net-use-skb_queue_empty_lockless-in-poll-handlers.patch
+net-use-skb_queue_empty_lockless-in-busy-poll-contexts.patch
+net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch
+ipv4-fix-route-update-on-metric-change.patch
+selftests-fib_tests-add-more-tests-for-metric-update.patch
+net-smc-fix-closing-of-fallback-smc-sockets.patch
+net-smc-keep-vlan_id-for-smc-r-in-smc_listen_work.patch
+keys-fix-memory-leak-in-copy_net_ns.patch
+net-phylink-fix-phylink_dbg-macro.patch
+rxrpc-fix-handling-of-last-subpacket-of-jumbo-packet.patch
+net-mlx5e-determine-source-port-properly-for-vlan-push-action.patch
+net-mlx5e-remove-incorrect-match-criteria-assignment-line.patch
+net-mlx5e-initialize-on-stack-link-modes-bitmap.patch
+net-mlx5-fix-flow-counter-list-auto-bits-struct.patch
+net-smc-fix-refcounting-for-non-blocking-connect.patch
+net-mlx5-fix-rtable-reference-leak.patch
+mlxsw-core-unpublish-devlink-parameters-during-reload.patch
+r8169-fix-wrong-phy-id-issue-with-rtl8168dp.patch
+net-mlx5e-fix-ethtool-self-test-link-speed.patch
+net-mlx5e-fix-handling-of-compressed-cqes-in-case-of-low-napi-budget.patch
+ipv4-fix-ipskb_frag_pmtu-handling-with-fragmentation.patch
+net-bcmgenet-don-t-set-phydev-link-from-mac.patch
+net-dsa-b53-do-not-clear-existing-mirrored-port-mask.patch
+net-dsa-fix-switch-tree-list.patch
+net-ensure-correct-skb-tstamp-in-various-fragmenters.patch
+net-hns3-fix-mis-counting-irq-vector-numbers-issue.patch
+net-netem-fix-error-path-for-corrupted-gso-frames.patch
+net-reorder-struct-net-fields-to-avoid-false-sharing.patch
+net-usb-lan78xx-connect-phy-before-registering-mac.patch
+r8152-add-device-id-for-lenovo-thinkpad-usb-c-dock-gen-2.patch
+net-netem-correct-the-parent-s-backlog-when-corrupted-packet-was-dropped.patch
+net-phy-bcm7xxx-define-soft_reset-for-40nm-ephy.patch
+net-bcmgenet-soft-reset-40nm-ephys-before-mac-init.patch
+net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 24 Oct 2019 11:43:31 -0700
+Subject: udp: fix data-race in udp_set_dev_scratch()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a793183caa9afae907a0d7ddd2ffd57329369bf5 ]
+
+KCSAN reported a data-race in udp_set_dev_scratch() [1]
+
+The issue here is that we must not write over skb fields
+if skb is shared. A similar issue has been fixed in commit
+89c22d8c3b27 ("net: Fix skb csum races when peeking")
+
+While we are at it, use a helper only dealing with
+udp_skb_scratch(skb)->csum_unnecessary, as this allows
+udp_set_dev_scratch() to be called once and thus inlined.
+
+[1]
+BUG: KCSAN: data-race in udp_set_dev_scratch / udpv6_recvmsg
+
+write to 0xffff888120278317 of 1 bytes by task 10411 on cpu 1:
+ udp_set_dev_scratch+0xea/0x200 net/ipv4/udp.c:1308
+ __first_packet_length+0x147/0x420 net/ipv4/udp.c:1556
+ first_packet_length+0x68/0x2a0 net/ipv4/udp.c:1579
+ udp_poll+0xea/0x110 net/ipv4/udp.c:2720
+ sock_poll+0xed/0x250 net/socket.c:1256
+ vfs_poll include/linux/poll.h:90 [inline]
+ do_select+0x7d0/0x1020 fs/select.c:534
+ core_sys_select+0x381/0x550 fs/select.c:677
+ do_pselect.constprop.0+0x11d/0x160 fs/select.c:759
+ __do_sys_pselect6 fs/select.c:784 [inline]
+ __se_sys_pselect6 fs/select.c:769 [inline]
+ __x64_sys_pselect6+0x12e/0x170 fs/select.c:769
+ do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+read to 0xffff888120278317 of 1 bytes by task 10413 on cpu 0:
+ udp_skb_csum_unnecessary include/net/udp.h:358 [inline]
+ udpv6_recvmsg+0x43e/0xe90 net/ipv6/udp.c:310
+ inet6_recvmsg+0xbb/0x240 net/ipv6/af_inet6.c:592
+ sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871
+ ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480
+ do_recvmmsg+0x19a/0x5c0 net/socket.c:2601
+ __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680
+ __do_sys_recvmmsg net/socket.c:2703 [inline]
+ __se_sys_recvmmsg net/socket.c:2696 [inline]
+ __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696
+ do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 10413 Comm: syz-executor.0 Not tainted 5.4.0-rc3+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Fixes: 2276f58ac589 ("udp: use a separate rx queue for packet reception")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c | 19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1316,6 +1316,20 @@ static void udp_set_dev_scratch(struct s
+ scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
+ }
+
++static void udp_skb_csum_unnecessary_set(struct sk_buff *skb)
++{
++ /* We come here after udp_lib_checksum_complete() returned 0.
++ * This means that __skb_checksum_complete() might have
++ * set skb->csum_valid to 1.
++ * On 64bit platforms, we can set csum_unnecessary
++ * to true, but only if the skb is not shared.
++ */
++#if BITS_PER_LONG == 64
++ if (!skb_shared(skb))
++ udp_skb_scratch(skb)->csum_unnecessary = true;
++#endif
++}
++
+ static int udp_skb_truesize(struct sk_buff *skb)
+ {
+ return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
+@@ -1550,10 +1564,7 @@ static struct sk_buff *__first_packet_le
+ *total += skb->truesize;
+ kfree_skb(skb);
+ } else {
+- /* the csum related bits could be changed, refresh
+- * the scratch area
+- */
+- udp_set_dev_scratch(skb);
++ udp_skb_csum_unnecessary_set(skb);
+ break;
+ }
+ }
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Oct 2019 22:44:49 -0700
+Subject: udp: use skb_queue_empty_lockless()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 137a0dbe3426fd7bcfe3f8117b36a87b3590e4eb ]
+
+syzbot reported a data-race [1].
+
+We should use skb_queue_empty_lockless() to document that we are
+not ensuring a mutual exclusion and silence KCSAN.
+
+[1]
+BUG: KCSAN: data-race in __skb_recv_udp / __udp_enqueue_schedule_skb
+
+write to 0xffff888122474b50 of 8 bytes by interrupt on cpu 0:
+ __skb_insert include/linux/skbuff.h:1852 [inline]
+ __skb_queue_before include/linux/skbuff.h:1958 [inline]
+ __skb_queue_tail include/linux/skbuff.h:1991 [inline]
+ __udp_enqueue_schedule_skb+0x2c1/0x410 net/ipv4/udp.c:1470
+ __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline]
+ udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057
+ udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074
+ udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233
+ __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300
+ udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470
+ ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204
+ ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
+ dst_input include/net/dst.h:442 [inline]
+ ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+
+read to 0xffff888122474b50 of 8 bytes by task 8921 on cpu 1:
+ skb_queue_empty include/linux/skbuff.h:1494 [inline]
+ __skb_recv_udp+0x18d/0x500 net/ipv4/udp.c:1653
+ udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712
+ inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838
+ sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871
+ ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480
+ do_recvmmsg+0x19a/0x5c0 net/socket.c:2601
+ __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680
+ __do_sys_recvmmsg net/socket.c:2703 [inline]
+ __se_sys_recvmmsg net/socket.c:2696 [inline]
+ __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696
+ do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 8921 Comm: syz-executor.4 Not tainted 5.4.0-rc3+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1588,7 +1588,7 @@ static int first_packet_length(struct so
+
+ spin_lock_bh(&rcvq->lock);
+ skb = __first_packet_length(sk, rcvq, &total);
+- if (!skb && !skb_queue_empty(sk_queue)) {
++ if (!skb && !skb_queue_empty_lockless(sk_queue)) {
+ spin_lock(&sk_queue->lock);
+ skb_queue_splice_tail_init(sk_queue, rcvq);
+ spin_unlock(&sk_queue->lock);
+@@ -1661,7 +1661,7 @@ struct sk_buff *__skb_recv_udp(struct so
+ return skb;
+ }
+
+- if (skb_queue_empty(sk_queue)) {
++ if (skb_queue_empty_lockless(sk_queue)) {
+ spin_unlock_bh(&queue->lock);
+ goto busy_check;
+ }
+@@ -1687,7 +1687,7 @@ busy_check:
+ break;
+
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
+- } while (!skb_queue_empty(sk_queue));
++ } while (!skb_queue_empty_lockless(sk_queue));
+
+ /* sk_queue is empty, reader_queue may contain peeked packets */
+ } while (timeo &&
--- /dev/null
+From foo@baz Wed 06 Nov 2019 03:23:18 PM CET
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 29 Oct 2019 01:24:32 +0800
+Subject: vxlan: check tun_info options_len properly
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit eadf52cf1852196a1363044dcda22fa5d7f296f7 ]
+
+This patch is to improve the tun_info options_len by dropping
+the skb when TUNNEL_VXLAN_OPT is set but options_len is less
+than vxlan_metadata. This can void a potential out-of-bounds
+access on ip_tun_info.
+
+Fixes: ee122c79d422 ("vxlan: Flow based tunneling")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -2487,9 +2487,11 @@ static void vxlan_xmit_one(struct sk_buf
+ vni = tunnel_id_to_key32(info->key.tun_id);
+ ifindex = 0;
+ dst_cache = &info->dst_cache;
+- if (info->options_len &&
+- info->key.tun_flags & TUNNEL_VXLAN_OPT)
++ if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
++ if (info->options_len < sizeof(*md))
++ goto drop;
+ md = ip_tunnel_info_opts(info);
++ }
+ ttl = info->key.ttl;
+ tos = info->key.tos;
+ label = info->key.label;