4.19-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 6 Nov 2019 14:33:34 +0000 (15:33 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 6 Nov 2019 14:33:34 +0000 (15:33 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 6 Nov 2019 14:33:34 +0000 (15:33 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 6 Nov 2019 14:33:34 +0000 (15:33 +0100)
diff --git a/queue-4.19/cxgb4-fix-panic-when-attaching-to-uld-fail.patch b/queue-4.19/cxgb4-fix-panic-when-attaching-to-uld-fail.patch

new file mode 100644 (file)

index 0000000..bfe6206
--- /dev/null
+++ b/queue-4.19/cxgb4-fix-panic-when-attaching-to-uld-fail.patch
@@ -0,0 +1,98 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Vishal Kulkarni <vishal@chelsio.com>
+Date: Wed, 30 Oct 2019 20:17:57 +0530
+Subject: cxgb4: fix panic when attaching to ULD fail
+
+From: Vishal Kulkarni <vishal@chelsio.com>
+
+[ Upstream commit fc89cc358fb64e2429aeae0f37906126636507ec ]
+
+Release resources when attaching to ULD fail. Otherwise, data
+mismatch is seen between LLD and ULD later on, which lead to
+kernel panic when accessing resources that should not even
+exist in the first place.
+
+Fixes: 94cdb8bb993a ("cxgb4: Add support for dynamic allocation of resources for ULD")
+Signed-off-by: Shahjada Abul Husain <shahjada@chelsio.com>
+Signed-off-by: Vishal Kulkarni <vishal@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c |   29 ++++++++++++++-----------
+ 1 file changed, 17 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+@@ -673,10 +673,10 @@ static void uld_init(struct adapter *ada
+       lld->write_cmpl_support = adap->params.write_cmpl_support;
+ }
+ 
+-static void uld_attach(struct adapter *adap, unsigned int uld)
++static int uld_attach(struct adapter *adap, unsigned int uld)
+ {
+-      void *handle;
+       struct cxgb4_lld_info lli;
++      void *handle;
+ 
+       uld_init(adap, &lli);
+       uld_queue_init(adap, uld, &lli);
+@@ -686,7 +686,7 @@ static void uld_attach(struct adapter *a
+               dev_warn(adap->pdev_dev,
+                        "could not attach to the %s driver, error %ld\n",
+                        adap->uld[uld].name, PTR_ERR(handle));
+-              return;
++              return PTR_ERR(handle);
+       }
+ 
+       adap->uld[uld].handle = handle;
+@@ -694,23 +694,24 @@ static void uld_attach(struct adapter *a
+ 
+       if (adap->flags & FULL_INIT_DONE)
+               adap->uld[uld].state_change(handle, CXGB4_STATE_UP);
++
++      return 0;
+ }
+ 
+-/**
+- *    cxgb4_register_uld - register an upper-layer driver
+- *    @type: the ULD type
+- *    @p: the ULD methods
++/* cxgb4_register_uld - register an upper-layer driver
++ * @type: the ULD type
++ * @p: the ULD methods
+  *
+- *    Registers an upper-layer driver with this driver and notifies the ULD
+- *    about any presently available devices that support its type.  Returns
+- *    %-EBUSY if a ULD of the same type is already registered.
++ * Registers an upper-layer driver with this driver and notifies the ULD
++ * about any presently available devices that support its type.  Returns
++ * %-EBUSY if a ULD of the same type is already registered.
+  */
+ int cxgb4_register_uld(enum cxgb4_uld type,
+                      const struct cxgb4_uld_info *p)
+ {
+-      int ret = 0;
+       unsigned int adap_idx = 0;
+       struct adapter *adap;
++      int ret = 0;
+ 
+       if (type >= CXGB4_ULD_MAX)
+               return -EINVAL;
+@@ -744,12 +745,16 @@ int cxgb4_register_uld(enum cxgb4_uld ty
+               if (ret)
+                       goto free_irq;
+               adap->uld[type] = *p;
+-              uld_attach(adap, type);
++              ret = uld_attach(adap, type);
++              if (ret)
++                      goto free_txq;
+               adap_idx++;
+       }
+       mutex_unlock(&uld_mutex);
+       return 0;
+ 
++free_txq:
++      release_sge_txq_uld(adap, type);
+ free_irq:
+       if (adap->flags & FULL_INIT_DONE)
+               quiesce_rx_uld(adap, type);
diff --git a/queue-4.19/dccp-do-not-leak-jiffies-on-the-wire.patch b/queue-4.19/dccp-do-not-leak-jiffies-on-the-wire.patch

new file mode 100644 (file)

index 0000000..8cf0d72
--- /dev/null
+++ b/queue-4.19/dccp-do-not-leak-jiffies-on-the-wire.patch
@@ -0,0 +1,32 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 4 Nov 2019 07:57:55 -0800
+Subject: dccp: do not leak jiffies on the wire
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3d1e5039f5f87a8731202ceca08764ee7cb010d3 ]
+
+For some reason I missed the case of DCCP passive
+flows in my previous patch.
+
+Fixes: a904a0693c18 ("inet: stop leaking jiffies on the wire")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Thiemo Nagel <tnagel@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv4.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -417,7 +417,7 @@ struct sock *dccp_v4_request_recv_sock(c
+       RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
+       newinet->mc_index  = inet_iif(skb);
+       newinet->mc_ttl    = ip_hdr(skb)->ttl;
+-      newinet->inet_id   = jiffies;
++      newinet->inet_id   = prandom_u32();
+ 
+       if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
+               goto put_and_exit;
diff --git a/queue-4.19/erspan-fix-the-tun_info-options_len-check-for-erspan.patch b/queue-4.19/erspan-fix-the-tun_info-options_len-check-for-erspan.patch

new file mode 100644 (file)

index 0000000..ed06fe1
--- /dev/null
+++ b/queue-4.19/erspan-fix-the-tun_info-options_len-check-for-erspan.patch
@@ -0,0 +1,51 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Xin Long <lucien.xin@gmail.com>
+Date: Mon, 28 Oct 2019 23:19:35 +0800
+Subject: erspan: fix the tun_info options_len check for erspan
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 2eb8d6d2910cfe3dc67dc056f26f3dd9c63d47cd ]
+
+The check for !md doens't really work for ip_tunnel_info_opts(info) which
+only does info + 1. Also to avoid out-of-bounds access on info, it should
+ensure options_len is not less than erspan_metadata in both erspan_xmit()
+and ip6erspan_tunnel_xmit().
+
+Fixes: 1a66a836da ("gre: add collect_md mode to ERSPAN tunnel")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_gre.c  |    4 ++--
+ net/ipv6/ip6_gre.c |    4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -589,9 +589,9 @@ static void erspan_fb_xmit(struct sk_buf
+       key = &tun_info->key;
+       if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+               goto err_free_rt;
++      if (tun_info->options_len < sizeof(*md))
++              goto err_free_rt;
+       md = ip_tunnel_info_opts(tun_info);
+-      if (!md)
+-              goto err_free_rt;
+ 
+       /* ERSPAN has fixed 8 byte GRE header */
+       version = md->version;
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1000,9 +1000,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit
+               dsfield = key->tos;
+               if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+                       goto tx_err;
+-              md = ip_tunnel_info_opts(tun_info);
+-              if (!md)
++              if (tun_info->options_len < sizeof(*md))
+                       goto tx_err;
++              md = ip_tunnel_info_opts(tun_info);
+ 
+               tun_id = tunnel_id_to_key32(key->tun_id);
+               if (md->version == 1) {
diff --git a/queue-4.19/inet-stop-leaking-jiffies-on-the-wire.patch b/queue-4.19/inet-stop-leaking-jiffies-on-the-wire.patch

new file mode 100644 (file)

index 0000000..193baaa
--- /dev/null
+++ b/queue-4.19/inet-stop-leaking-jiffies-on-the-wire.patch
@@ -0,0 +1,106 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 1 Nov 2019 10:32:19 -0700
+Subject: inet: stop leaking jiffies on the wire
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a904a0693c189691eeee64f6c6b188bd7dc244e9 ]
+
+Historically linux tried to stick to RFC 791, 1122, 2003
+for IPv4 ID field generation.
+
+RFC 6864 made clear that no matter how hard we try,
+we can not ensure unicity of IP ID within maximum
+lifetime for all datagrams with a given source
+address/destination address/protocol tuple.
+
+Linux uses a per socket inet generator (inet_id), initialized
+at connection startup with a XOR of 'jiffies' and other
+fields that appear clear on the wire.
+
+Thiemo Nagel pointed that this strategy is a privacy
+concern as this provides 16 bits of entropy to fingerprint
+devices.
+
+Let's switch to a random starting point, this is just as
+good as far as RFC 6864 is concerned and does not leak
+anything critical.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Thiemo Nagel <tnagel@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/chelsio/chtls/chtls_cm.c |    2 +-
+ net/dccp/ipv4.c                         |    2 +-
+ net/ipv4/datagram.c                     |    2 +-
+ net/ipv4/tcp_ipv4.c                     |    4 ++--
+ net/sctp/socket.c                       |    2 +-
+ 5 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
++++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
+@@ -1276,7 +1276,7 @@ static void make_established(struct sock
+       tp->write_seq = snd_isn;
+       tp->snd_nxt = snd_isn;
+       tp->snd_una = snd_isn;
+-      inet_sk(sk)->inet_id = tp->write_seq ^ jiffies;
++      inet_sk(sk)->inet_id = prandom_u32();
+       assign_rxopt(sk, opt);
+ 
+       if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -121,7 +121,7 @@ int dccp_v4_connect(struct sock *sk, str
+                                                   inet->inet_daddr,
+                                                   inet->inet_sport,
+                                                   inet->inet_dport);
+-      inet->inet_id = dp->dccps_iss ^ jiffies;
++      inet->inet_id = prandom_u32();
+ 
+       err = dccp_connect(sk);
+       rt = NULL;
+--- a/net/ipv4/datagram.c
++++ b/net/ipv4/datagram.c
+@@ -77,7 +77,7 @@ int __ip4_datagram_connect(struct sock *
+       reuseport_has_conns(sk, true);
+       sk->sk_state = TCP_ESTABLISHED;
+       sk_set_txhash(sk);
+-      inet->inet_id = jiffies;
++      inet->inet_id = prandom_u32();
+ 
+       sk_dst_set(sk, &rt->dst);
+       err = 0;
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -305,7 +305,7 @@ int tcp_v4_connect(struct sock *sk, stru
+                                                inet->inet_daddr);
+       }
+ 
+-      inet->inet_id = tp->write_seq ^ jiffies;
++      inet->inet_id = prandom_u32();
+ 
+       if (tcp_fastopen_defer_connect(sk, &err))
+               return err;
+@@ -1436,7 +1436,7 @@ struct sock *tcp_v4_syn_recv_sock(const
+       inet_csk(newsk)->icsk_ext_hdr_len = 0;
+       if (inet_opt)
+               inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
+-      newinet->inet_id = newtp->write_seq ^ jiffies;
++      newinet->inet_id = prandom_u32();
+ 
+       if (!dst) {
+               dst = inet_csk_route_child_sock(sk, newsk, req);
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -8777,7 +8777,7 @@ void sctp_copy_sock(struct sock *newsk,
+       newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
+       newinet->inet_dport = htons(asoc->peer.port);
+       newinet->pmtudisc = inet->pmtudisc;
+-      newinet->inet_id = asoc->next_tsn ^ jiffies;
++      newinet->inet_id = prandom_u32();
+ 
+       newinet->uc_ttl = inet->uc_ttl;
+       newinet->mc_loop = 1;
diff --git a/queue-4.19/ipv4-fix-route-update-on-metric-change.patch b/queue-4.19/ipv4-fix-route-update-on-metric-change.patch

new file mode 100644 (file)

index 0000000..c06772f
--- /dev/null
+++ b/queue-4.19/ipv4-fix-route-update-on-metric-change.patch
@@ -0,0 +1,65 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Sat, 26 Oct 2019 11:53:39 +0200
+Subject: ipv4: fix route update on metric change.
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 0b834ba00ab5337e938c727e216e1f5249794717 ]
+
+Since commit af4d768ad28c ("net/ipv4: Add support for specifying metric
+of connected routes"), when updating an IP address with a different metric,
+the associated connected route is updated, too.
+
+Still, the mentioned commit doesn't handle properly some corner cases:
+
+$ ip addr add dev eth0 192.168.1.0/24
+$ ip addr add dev eth0 192.168.2.1/32 peer 192.168.2.2
+$ ip addr add dev eth0 192.168.3.1/24
+$ ip addr change dev eth0 192.168.1.0/24 metric 10
+$ ip addr change dev eth0 192.168.2.1/32 peer 192.168.2.2 metric 10
+$ ip addr change dev eth0 192.168.3.1/24 metric 10
+$ ip -4 route
+192.168.1.0/24 dev eth0 proto kernel scope link src 192.168.1.0
+192.168.2.2 dev eth0 proto kernel scope link src 192.168.2.1
+192.168.3.0/24 dev eth0 proto kernel scope link src 192.168.2.1 metric 10
+
+Only the last route is correctly updated.
+
+The problem is the current test in fib_modify_prefix_metric():
+
+       if (!(dev->flags & IFF_UP) ||
+           ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
+           ipv4_is_zeronet(prefix) ||
+           prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32)
+
+Which should be the logical 'not' of the pre-existing test in
+fib_add_ifaddr():
+
+       if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
+           (prefix != addr || ifa->ifa_prefixlen < 32))
+
+To properly negate the original expression, we need to change the last
+logical 'or' to a logical 'and'.
+
+Fixes: af4d768ad28c ("net/ipv4: Add support for specifying metric of connected routes")
+Reported-and-suggested-by: Beniamino Galvani <bgalvani@redhat.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -946,7 +946,7 @@ void fib_modify_prefix_metric(struct in_
+       if (!(dev->flags & IFF_UP) ||
+           ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
+           ipv4_is_zeronet(prefix) ||
+-          prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32)
++          (prefix == ifa->ifa_local && ifa->ifa_prefixlen == 32))
+               return;
+ 
+       /* add the new */
diff --git a/queue-4.19/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch b/queue-4.19/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch

new file mode 100644 (file)

index 0000000..d3a6a8f
--- /dev/null
+++ b/queue-4.19/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch
@@ -0,0 +1,79 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Oct 2019 22:44:52 -0700
+Subject: net: add READ_ONCE() annotation in __skb_wait_for_more_packets()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7c422d0ce97552dde4a97e6290de70ec6efb0fc6 ]
+
+__skb_wait_for_more_packets() can be called while other cpus
+can feed packets to the socket receive queue.
+
+KCSAN reported :
+
+BUG: KCSAN: data-race in __skb_wait_for_more_packets / __udp_enqueue_schedule_skb
+
+write to 0xffff888102e40b58 of 8 bytes by interrupt on cpu 0:
+ __skb_insert include/linux/skbuff.h:1852 [inline]
+ __skb_queue_before include/linux/skbuff.h:1958 [inline]
+ __skb_queue_tail include/linux/skbuff.h:1991 [inline]
+ __udp_enqueue_schedule_skb+0x2d7/0x410 net/ipv4/udp.c:1470
+ __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline]
+ udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057
+ udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074
+ udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233
+ __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300
+ udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470
+ ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204
+ ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
+ dst_input include/net/dst.h:442 [inline]
+ ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+
+read to 0xffff888102e40b58 of 8 bytes by task 13035 on cpu 1:
+ __skb_wait_for_more_packets+0xfa/0x320 net/core/datagram.c:100
+ __skb_recv_udp+0x374/0x500 net/ipv4/udp.c:1683
+ udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712
+ inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838
+ sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871
+ ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480
+ do_recvmmsg+0x19a/0x5c0 net/socket.c:2601
+ __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680
+ __do_sys_recvmmsg net/socket.c:2703 [inline]
+ __se_sys_recvmmsg net/socket.c:2696 [inline]
+ __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696
+ do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 13035 Comm: syz-executor.3 Not tainted 5.4.0-rc3+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/datagram.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -95,7 +95,7 @@ int __skb_wait_for_more_packets(struct s
+       if (error)
+               goto out_err;
+ 
+-      if (sk->sk_receive_queue.prev != skb)
++      if (READ_ONCE(sk->sk_receive_queue.prev) != skb)
+               goto out;
+ 
+       /* Socket shut down? */
diff --git a/queue-4.19/net-add-skb_queue_empty_lockless.patch b/queue-4.19/net-add-skb_queue_empty_lockless.patch

new file mode 100644 (file)

index 0000000..d5053ce
--- /dev/null
+++ b/queue-4.19/net-add-skb_queue_empty_lockless.patch
@@ -0,0 +1,93 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Oct 2019 22:44:48 -0700
+Subject: net: add skb_queue_empty_lockless()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit d7d16a89350ab263484c0aa2b523dd3a234e4a80 ]
+
+Some paths call skb_queue_empty() without holding
+the queue lock. We must use a barrier in order
+to not let the compiler do strange things, and avoid
+KCSAN splats.
+
+Adding a barrier in skb_queue_empty() might be overkill,
+I prefer adding a new helper to clearly identify
+points where the callers might be lockless. This might
+help us finding real bugs.
+
+The corresponding WRITE_ONCE() should add zero cost
+for current compilers.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h |   33 ++++++++++++++++++++++++---------
+ 1 file changed, 24 insertions(+), 9 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1380,6 +1380,19 @@ static inline int skb_queue_empty(const
+ }
+ 
+ /**
++ *    skb_queue_empty_lockless - check if a queue is empty
++ *    @list: queue head
++ *
++ *    Returns true if the queue is empty, false otherwise.
++ *    This variant can be used in lockless contexts.
++ */
++static inline bool skb_queue_empty_lockless(const struct sk_buff_head *list)
++{
++      return READ_ONCE(list->next) == (const struct sk_buff *) list;
++}
++
++
++/**
+  *    skb_queue_is_last - check if skb is the last entry in the queue
+  *    @list: queue head
+  *    @skb: buffer
+@@ -1723,9 +1736,11 @@ static inline void __skb_insert(struct s
+                               struct sk_buff *prev, struct sk_buff *next,
+                               struct sk_buff_head *list)
+ {
+-      newsk->next = next;
+-      newsk->prev = prev;
+-      next->prev  = prev->next = newsk;
++      /* see skb_queue_empty_lockless() for the opposite READ_ONCE() */
++      WRITE_ONCE(newsk->next, next);
++      WRITE_ONCE(newsk->prev, prev);
++      WRITE_ONCE(next->prev, newsk);
++      WRITE_ONCE(prev->next, newsk);
+       list->qlen++;
+ }
+ 
+@@ -1736,11 +1751,11 @@ static inline void __skb_queue_splice(co
+       struct sk_buff *first = list->next;
+       struct sk_buff *last = list->prev;
+ 
+-      first->prev = prev;
+-      prev->next = first;
++      WRITE_ONCE(first->prev, prev);
++      WRITE_ONCE(prev->next, first);
+ 
+-      last->next = next;
+-      next->prev = last;
++      WRITE_ONCE(last->next, next);
++      WRITE_ONCE(next->prev, last);
+ }
+ 
+ /**
+@@ -1881,8 +1896,8 @@ static inline void __skb_unlink(struct s
+       next       = skb->next;
+       prev       = skb->prev;
+       skb->next  = skb->prev = NULL;
+-      next->prev = prev;
+-      prev->next = next;
++      WRITE_ONCE(next->prev, prev);
++      WRITE_ONCE(prev->next, next);
+ }
+ 
+ /**
diff --git a/queue-4.19/net-annotate-accesses-to-sk-sk_incoming_cpu.patch b/queue-4.19/net-annotate-accesses-to-sk-sk_incoming_cpu.patch

new file mode 100644 (file)

index 0000000..48c8969
--- /dev/null
+++ b/queue-4.19/net-annotate-accesses-to-sk-sk_incoming_cpu.patch
@@ -0,0 +1,158 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 30 Oct 2019 13:00:04 -0700
+Subject: net: annotate accesses to sk->sk_incoming_cpu
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7170a977743b72cf3eb46ef6ef89885dc7ad3621 ]
+
+This socket field can be read and written by concurrent cpus.
+
+Use READ_ONCE() and WRITE_ONCE() annotations to document this,
+and avoid some compiler 'optimizations'.
+
+KCSAN reported :
+
+BUG: KCSAN: data-race in tcp_v4_rcv / tcp_v4_rcv
+
+write to 0xffff88812220763c of 4 bytes by interrupt on cpu 0:
+ sk_incoming_cpu_update include/net/sock.h:953 [inline]
+ tcp_v4_rcv+0x1b3c/0x1bb0 net/ipv4/tcp_ipv4.c:1934
+ ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204
+ ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
+ dst_input include/net/dst.h:442 [inline]
+ ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+ napi_poll net/core/dev.c:6392 [inline]
+ net_rx_action+0x3ae/0xa90 net/core/dev.c:6460
+ __do_softirq+0x115/0x33f kernel/softirq.c:292
+ do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1082
+ do_softirq.part.0+0x6b/0x80 kernel/softirq.c:337
+ do_softirq kernel/softirq.c:329 [inline]
+ __local_bh_enable_ip+0x76/0x80 kernel/softirq.c:189
+
+read to 0xffff88812220763c of 4 bytes by interrupt on cpu 1:
+ sk_incoming_cpu_update include/net/sock.h:952 [inline]
+ tcp_v4_rcv+0x181a/0x1bb0 net/ipv4/tcp_ipv4.c:1934
+ ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204
+ ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
+ dst_input include/net/dst.h:442 [inline]
+ ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+ napi_poll net/core/dev.c:6392 [inline]
+ net_rx_action+0x3ae/0xa90 net/core/dev.c:6460
+ __do_softirq+0x115/0x33f kernel/softirq.c:292
+ run_ksoftirqd+0x46/0x60 kernel/softirq.c:603
+ smpboot_thread_fn+0x37d/0x4a0 kernel/smpboot.c:165
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.4.0-rc3+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sock.h          |    4 ++--
+ net/core/sock.c             |    4 ++--
+ net/ipv4/inet_hashtables.c  |    2 +-
+ net/ipv4/udp.c              |    2 +-
+ net/ipv6/inet6_hashtables.c |    2 +-
+ net/ipv6/udp.c              |    2 +-
+ 6 files changed, 8 insertions(+), 8 deletions(-)
+
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -945,8 +945,8 @@ static inline void sk_incoming_cpu_updat
+ {
+       int cpu = raw_smp_processor_id();
+ 
+-      if (unlikely(sk->sk_incoming_cpu != cpu))
+-              sk->sk_incoming_cpu = cpu;
++      if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu))
++              WRITE_ONCE(sk->sk_incoming_cpu, cpu);
+ }
+ 
+ static inline void sock_rps_record_flow_hash(__u32 hash)
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1005,7 +1005,7 @@ set_rcvbuf:
+               break;
+ 
+       case SO_INCOMING_CPU:
+-              sk->sk_incoming_cpu = val;
++              WRITE_ONCE(sk->sk_incoming_cpu, val);
+               break;
+ 
+       case SO_CNX_ADVICE:
+@@ -1341,7 +1341,7 @@ int sock_getsockopt(struct socket *sock,
+               break;
+ 
+       case SO_INCOMING_CPU:
+-              v.val = sk->sk_incoming_cpu;
++              v.val = READ_ONCE(sk->sk_incoming_cpu);
+               break;
+ 
+       case SO_MEMINFO:
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -248,7 +248,7 @@ static inline int compute_score(struct s
+                       if (sk->sk_bound_dev_if)
+                               score += 4;
+               }
+-              if (sk->sk_incoming_cpu == raw_smp_processor_id())
++              if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
+                       score++;
+       }
+       return score;
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -408,7 +408,7 @@ static int compute_score(struct sock *sk
+                       score += 4;
+       }
+ 
+-      if (sk->sk_incoming_cpu == raw_smp_processor_id())
++      if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
+               score++;
+       return score;
+ }
+--- a/net/ipv6/inet6_hashtables.c
++++ b/net/ipv6/inet6_hashtables.c
+@@ -118,7 +118,7 @@ static inline int compute_score(struct s
+                       if (sk->sk_bound_dev_if)
+                               score++;
+               }
+-              if (sk->sk_incoming_cpu == raw_smp_processor_id())
++              if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
+                       score++;
+       }
+       return score;
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -154,7 +154,7 @@ static int compute_score(struct sock *sk
+                       score++;
+       }
+ 
+-      if (sk->sk_incoming_cpu == raw_smp_processor_id())
++      if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
+               score++;
+ 
+       return score;
diff --git a/queue-4.19/net-annotate-lockless-accesses-to-sk-sk_napi_id.patch b/queue-4.19/net-annotate-lockless-accesses-to-sk-sk_napi_id.patch

new file mode 100644 (file)

index 0000000..f2e73ad
--- /dev/null
+++ b/queue-4.19/net-annotate-lockless-accesses-to-sk-sk_napi_id.patch
@@ -0,0 +1,98 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 29 Oct 2019 10:54:44 -0700
+Subject: net: annotate lockless accesses to sk->sk_napi_id
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ee8d153d46a3b98c064ee15c0c0a3bbf1450e5a1 ]
+
+We already annotated most accesses to sk->sk_napi_id
+
+We missed sk_mark_napi_id() and sk_mark_napi_id_once()
+which might be called without socket lock held in UDP stack.
+
+KCSAN reported :
+BUG: KCSAN: data-race in udpv6_queue_rcv_one_skb / udpv6_queue_rcv_one_skb
+
+write to 0xffff888121c6d108 of 4 bytes by interrupt on cpu 0:
+ sk_mark_napi_id include/net/busy_poll.h:125 [inline]
+ __udpv6_queue_rcv_skb net/ipv6/udp.c:571 [inline]
+ udpv6_queue_rcv_one_skb+0x70c/0xb40 net/ipv6/udp.c:672
+ udpv6_queue_rcv_skb+0xb5/0x400 net/ipv6/udp.c:689
+ udp6_unicast_rcv_skb.isra.0+0xd7/0x180 net/ipv6/udp.c:832
+ __udp6_lib_rcv+0x69c/0x1770 net/ipv6/udp.c:913
+ udpv6_rcv+0x2b/0x40 net/ipv6/udp.c:1015
+ ip6_protocol_deliver_rcu+0x22a/0xbe0 net/ipv6/ip6_input.c:409
+ ip6_input_finish+0x30/0x50 net/ipv6/ip6_input.c:450
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip6_input+0x177/0x190 net/ipv6/ip6_input.c:459
+ dst_input include/net/dst.h:442 [inline]
+ ip6_rcv_finish+0x110/0x140 net/ipv6/ip6_input.c:76
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ipv6_rcv+0x1a1/0x1b0 net/ipv6/ip6_input.c:284
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+ napi_poll net/core/dev.c:6392 [inline]
+ net_rx_action+0x3ae/0xa90 net/core/dev.c:6460
+
+write to 0xffff888121c6d108 of 4 bytes by interrupt on cpu 1:
+ sk_mark_napi_id include/net/busy_poll.h:125 [inline]
+ __udpv6_queue_rcv_skb net/ipv6/udp.c:571 [inline]
+ udpv6_queue_rcv_one_skb+0x70c/0xb40 net/ipv6/udp.c:672
+ udpv6_queue_rcv_skb+0xb5/0x400 net/ipv6/udp.c:689
+ udp6_unicast_rcv_skb.isra.0+0xd7/0x180 net/ipv6/udp.c:832
+ __udp6_lib_rcv+0x69c/0x1770 net/ipv6/udp.c:913
+ udpv6_rcv+0x2b/0x40 net/ipv6/udp.c:1015
+ ip6_protocol_deliver_rcu+0x22a/0xbe0 net/ipv6/ip6_input.c:409
+ ip6_input_finish+0x30/0x50 net/ipv6/ip6_input.c:450
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip6_input+0x177/0x190 net/ipv6/ip6_input.c:459
+ dst_input include/net/dst.h:442 [inline]
+ ip6_rcv_finish+0x110/0x140 net/ipv6/ip6_input.c:76
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ipv6_rcv+0x1a1/0x1b0 net/ipv6/ip6_input.c:284
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 10890 Comm: syz-executor.0 Not tainted 5.4.0-rc3+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Fixes: e68b6e50fa35 ("udp: enable busy polling for all sockets")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/busy_poll.h |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/include/net/busy_poll.h
++++ b/include/net/busy_poll.h
+@@ -134,7 +134,7 @@ static inline void skb_mark_napi_id(stru
+ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
+ {
+ #ifdef CONFIG_NET_RX_BUSY_POLL
+-      sk->sk_napi_id = skb->napi_id;
++      WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
+ #endif
+       sk_rx_queue_set(sk, skb);
+ }
+@@ -144,8 +144,8 @@ static inline void sk_mark_napi_id_once(
+                                       const struct sk_buff *skb)
+ {
+ #ifdef CONFIG_NET_RX_BUSY_POLL
+-      if (!sk->sk_napi_id)
+-              sk->sk_napi_id = skb->napi_id;
++      if (!READ_ONCE(sk->sk_napi_id))
++              WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
+ #endif
+ }
+ 
diff --git a/queue-4.19/net-bcmgenet-don-t-set-phydev-link-from-mac.patch b/queue-4.19/net-bcmgenet-don-t-set-phydev-link-from-mac.patch

new file mode 100644 (file)

index 0000000..a04329a
--- /dev/null
+++ b/queue-4.19/net-bcmgenet-don-t-set-phydev-link-from-mac.patch
@@ -0,0 +1,44 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Doug Berger <opendmb@gmail.com>
+Date: Wed, 16 Oct 2019 16:06:29 -0700
+Subject: net: bcmgenet: don't set phydev->link from MAC
+
+From: Doug Berger <opendmb@gmail.com>
+
+[ Upstream commit 7de48402faa32298c3551ea32c76ccb4f9d3025d ]
+
+When commit 28b2e0d2cd13 ("net: phy: remove parameter new_link from
+phy_mac_interrupt()") removed the new_link parameter it set the
+phydev->link state from the MAC before invoking phy_mac_interrupt().
+
+However, once commit 88d6272acaaa ("net: phy: avoid unneeded MDIO
+reads in genphy_read_status") was added this initialization prevents
+the proper determination of the connection parameters by the function
+genphy_read_status().
+
+This commit removes that initialization to restore the proper
+functionality.
+
+Fixes: 88d6272acaaa ("net: phy: avoid unneeded MDIO reads in genphy_read_status")
+Signed-off-by: Doug Berger <opendmb@gmail.com>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -2619,10 +2619,8 @@ static void bcmgenet_irq_task(struct wor
+       spin_unlock_irq(&priv->lock);
+ 
+       /* Link UP/DOWN event */
+-      if (status & UMAC_IRQ_LINK_EVENT) {
+-              priv->dev->phydev->link = !!(status & UMAC_IRQ_LINK_UP);
++      if (status & UMAC_IRQ_LINK_EVENT)
+               phy_mac_interrupt(priv->dev->phydev);
+-      }
+ }
+ 
+ /* bcmgenet_isr1: handle Rx and Tx priority queues */
diff --git a/queue-4.19/net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch b/queue-4.19/net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch

new file mode 100644 (file)

index 0000000..19482d6
--- /dev/null
+++ b/queue-4.19/net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch
@@ -0,0 +1,65 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Doug Berger <opendmb@gmail.com>
+Date: Wed, 16 Oct 2019 16:06:32 -0700
+Subject: net: bcmgenet: reset 40nm EPHY on energy detect
+
+From: Doug Berger <opendmb@gmail.com>
+
+[ Upstream commit 25382b991d252aed961cd434176240f9de6bb15f ]
+
+The EPHY integrated into the 40nm Set-Top Box devices can falsely
+detect energy when connected to a disabled peer interface. When the
+peer interface is enabled the EPHY will detect and report the link
+as active, but on occasion may get into a state where it is not
+able to exchange data with the connected GENET MAC. This issue has
+not been observed when the link parameters are auto-negotiated;
+however, it has been observed with a manually configured link.
+
+It has been empirically determined that issuing a soft reset to the
+EPHY when energy is detected prevents it from getting into this bad
+state.
+
+Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
+Signed-off-by: Doug Berger <opendmb@gmail.com>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -2020,6 +2020,8 @@ static void bcmgenet_link_intr_enable(st
+        */
+       if (priv->internal_phy) {
+               int0_enable |= UMAC_IRQ_LINK_EVENT;
++              if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv))
++                      int0_enable |= UMAC_IRQ_PHY_DET_R;
+       } else if (priv->ext_phy) {
+               int0_enable |= UMAC_IRQ_LINK_EVENT;
+       } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
+@@ -2618,9 +2620,14 @@ static void bcmgenet_irq_task(struct wor
+       priv->irq0_stat = 0;
+       spin_unlock_irq(&priv->lock);
+ 
++      if (status & UMAC_IRQ_PHY_DET_R &&
++          priv->dev->phydev->autoneg != AUTONEG_ENABLE)
++              phy_init_hw(priv->dev->phydev);
++
+       /* Link UP/DOWN event */
+       if (status & UMAC_IRQ_LINK_EVENT)
+               phy_mac_interrupt(priv->dev->phydev);
++
+ }
+ 
+ /* bcmgenet_isr1: handle Rx and Tx priority queues */
+@@ -2715,7 +2722,7 @@ static irqreturn_t bcmgenet_isr0(int irq
+       }
+ 
+       /* all other interested interrupts handled in bottom half */
+-      status &= UMAC_IRQ_LINK_EVENT;
++      status &= (UMAC_IRQ_LINK_EVENT | UMAC_IRQ_PHY_DET_R);
+       if (status) {
+               /* Save irq status for bottom-half processing. */
+               spin_lock_irqsave(&priv->lock, flags);
diff --git a/queue-4.19/net-bcmgenet-soft-reset-40nm-ephys-before-mac-init.patch b/queue-4.19/net-bcmgenet-soft-reset-40nm-ephys-before-mac-init.patch

new file mode 100644 (file)

index 0000000..af6bacd
--- /dev/null
+++ b/queue-4.19/net-bcmgenet-soft-reset-40nm-ephys-before-mac-init.patch
@@ -0,0 +1,264 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Doug Berger <opendmb@gmail.com>
+Date: Wed, 16 Oct 2019 16:06:31 -0700
+Subject: net: bcmgenet: soft reset 40nm EPHYs before MAC init
+
+From: Doug Berger <opendmb@gmail.com>
+
+[ Upstream commit 1f515486275a08a17a2c806b844cca18f7de5b34 ]
+
+It turns out that the "Workaround for putting the PHY in IDDQ mode"
+used by the internal EPHYs on 40nm Set-Top Box chips when powering
+down puts the interface to the GENET MAC in a state that can cause
+subsequent MAC resets to be incomplete.
+
+Rather than restore the forced soft reset when powering up internal
+PHYs, this commit moves the invocation of phy_init_hw earlier in
+the MAC initialization sequence to just before the MAC reset in the
+open and resume functions. This allows the interface to be stable
+and allows the MAC resets to be successful.
+
+The bcmgenet_mii_probe() function is split in two to accommodate
+this. The new function bcmgenet_mii_connect() handles the first
+half of the functionality before the MAC initialization, and the
+bcmgenet_mii_config() function is extended to provide the remaining
+PHY configuration following the MAC initialization.
+
+Fixes: 484bfa1507bf ("Revert "net: bcmgenet: Software reset EPHY after power on"")
+Signed-off-by: Doug Berger <opendmb@gmail.com>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.c |   28 +++---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.h |    2 
+ drivers/net/ethernet/broadcom/genet/bcmmii.c   |  112 +++++++++++--------------
+ 3 files changed, 69 insertions(+), 73 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -2879,6 +2879,12 @@ static int bcmgenet_open(struct net_devi
+       if (priv->internal_phy)
+               bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+ 
++      ret = bcmgenet_mii_connect(dev);
++      if (ret) {
++              netdev_err(dev, "failed to connect to PHY\n");
++              goto err_clk_disable;
++      }
++
+       /* take MAC out of reset */
+       bcmgenet_umac_reset(priv);
+ 
+@@ -2888,6 +2894,12 @@ static int bcmgenet_open(struct net_devi
+       reg = bcmgenet_umac_readl(priv, UMAC_CMD);
+       priv->crc_fwd_en = !!(reg & CMD_CRC_FWD);
+ 
++      ret = bcmgenet_mii_config(dev, true);
++      if (ret) {
++              netdev_err(dev, "unsupported PHY\n");
++              goto err_disconnect_phy;
++      }
++
+       bcmgenet_set_hw_addr(priv, dev->dev_addr);
+ 
+       if (priv->internal_phy) {
+@@ -2903,7 +2915,7 @@ static int bcmgenet_open(struct net_devi
+       ret = bcmgenet_init_dma(priv);
+       if (ret) {
+               netdev_err(dev, "failed to initialize DMA\n");
+-              goto err_clk_disable;
++              goto err_disconnect_phy;
+       }
+ 
+       /* Always enable ring 16 - descriptor ring */
+@@ -2926,25 +2938,19 @@ static int bcmgenet_open(struct net_devi
+               goto err_irq0;
+       }
+ 
+-      ret = bcmgenet_mii_probe(dev);
+-      if (ret) {
+-              netdev_err(dev, "failed to connect to PHY\n");
+-              goto err_irq1;
+-      }
+-
+       bcmgenet_netif_start(dev);
+ 
+       netif_tx_start_all_queues(dev);
+ 
+       return 0;
+ 
+-err_irq1:
+-      free_irq(priv->irq1, priv);
+ err_irq0:
+       free_irq(priv->irq0, priv);
+ err_fini_dma:
+       bcmgenet_dma_teardown(priv);
+       bcmgenet_fini_dma(priv);
++err_disconnect_phy:
++      phy_disconnect(dev->phydev);
+ err_clk_disable:
+       if (priv->internal_phy)
+               bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
+@@ -3657,6 +3663,8 @@ static int bcmgenet_resume(struct device
+       if (priv->internal_phy)
+               bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+ 
++      phy_init_hw(dev->phydev);
++
+       bcmgenet_umac_reset(priv);
+ 
+       init_umac(priv);
+@@ -3665,8 +3673,6 @@ static int bcmgenet_resume(struct device
+       if (priv->wolopts)
+               clk_disable_unprepare(priv->clk_wol);
+ 
+-      phy_init_hw(dev->phydev);
+-
+       /* Speed settings must be restored */
+       bcmgenet_mii_config(priv->dev, false);
+ 
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+@@ -723,8 +723,8 @@ GENET_IO_MACRO(rbuf, GENET_RBUF_OFF);
+ 
+ /* MDIO routines */
+ int bcmgenet_mii_init(struct net_device *dev);
++int bcmgenet_mii_connect(struct net_device *dev);
+ int bcmgenet_mii_config(struct net_device *dev, bool init);
+-int bcmgenet_mii_probe(struct net_device *dev);
+ void bcmgenet_mii_exit(struct net_device *dev);
+ void bcmgenet_phy_power_set(struct net_device *dev, bool enable);
+ void bcmgenet_mii_setup(struct net_device *dev);
+--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
+@@ -176,6 +176,46 @@ static void bcmgenet_moca_phy_setup(stru
+                                         bcmgenet_fixed_phy_link_update);
+ }
+ 
++int bcmgenet_mii_connect(struct net_device *dev)
++{
++      struct bcmgenet_priv *priv = netdev_priv(dev);
++      struct device_node *dn = priv->pdev->dev.of_node;
++      struct phy_device *phydev;
++      u32 phy_flags = 0;
++      int ret;
++
++      /* Communicate the integrated PHY revision */
++      if (priv->internal_phy)
++              phy_flags = priv->gphy_rev;
++
++      /* Initialize link state variables that bcmgenet_mii_setup() uses */
++      priv->old_link = -1;
++      priv->old_speed = -1;
++      priv->old_duplex = -1;
++      priv->old_pause = -1;
++
++      if (dn) {
++              phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup,
++                                      phy_flags, priv->phy_interface);
++              if (!phydev) {
++                      pr_err("could not attach to PHY\n");
++                      return -ENODEV;
++              }
++      } else {
++              phydev = dev->phydev;
++              phydev->dev_flags = phy_flags;
++
++              ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
++                                       priv->phy_interface);
++              if (ret) {
++                      pr_err("could not attach to PHY\n");
++                      return -ENODEV;
++              }
++      }
++
++      return 0;
++}
++
+ int bcmgenet_mii_config(struct net_device *dev, bool init)
+ {
+       struct bcmgenet_priv *priv = netdev_priv(dev);
+@@ -269,71 +309,21 @@ int bcmgenet_mii_config(struct net_devic
+               bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
+       }
+ 
+-      if (init)
+-              dev_info(kdev, "configuring instance for %s\n", phy_name);
+-
+-      return 0;
+-}
+-
+-int bcmgenet_mii_probe(struct net_device *dev)
+-{
+-      struct bcmgenet_priv *priv = netdev_priv(dev);
+-      struct device_node *dn = priv->pdev->dev.of_node;
+-      struct phy_device *phydev;
+-      u32 phy_flags = 0;
+-      int ret;
+-
+-      /* Communicate the integrated PHY revision */
+-      if (priv->internal_phy)
+-              phy_flags = priv->gphy_rev;
+-
+-      /* Initialize link state variables that bcmgenet_mii_setup() uses */
+-      priv->old_link = -1;
+-      priv->old_speed = -1;
+-      priv->old_duplex = -1;
+-      priv->old_pause = -1;
+-
+-      if (dn) {
+-              phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup,
+-                                      phy_flags, priv->phy_interface);
+-              if (!phydev) {
+-                      pr_err("could not attach to PHY\n");
+-                      return -ENODEV;
+-              }
+-      } else {
+-              phydev = dev->phydev;
+-              phydev->dev_flags = phy_flags;
++      if (init) {
++              phydev->advertising = phydev->supported;
+ 
+-              ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
+-                                       priv->phy_interface);
+-              if (ret) {
+-                      pr_err("could not attach to PHY\n");
+-                      return -ENODEV;
+-              }
+-      }
++              /* The internal PHY has its link interrupts routed to the
++               * Ethernet MAC ISRs. On GENETv5 there is a hardware issue
++               * that prevents the signaling of link UP interrupts when
++               * the link operates at 10Mbps, so fallback to polling for
++               * those versions of GENET.
++               */
++              if (priv->internal_phy && !GENET_IS_V5(priv))
++                      phydev->irq = PHY_IGNORE_INTERRUPT;
+ 
+-      /* Configure port multiplexer based on what the probed PHY device since
+-       * reading the 'max-speed' property determines the maximum supported
+-       * PHY speed which is needed for bcmgenet_mii_config() to configure
+-       * things appropriately.
+-       */
+-      ret = bcmgenet_mii_config(dev, true);
+-      if (ret) {
+-              phy_disconnect(dev->phydev);
+-              return ret;
++              dev_info(kdev, "configuring instance for %s\n", phy_name);
+       }
+ 
+-      phydev->advertising = phydev->supported;
+-
+-      /* The internal PHY has its link interrupts routed to the
+-       * Ethernet MAC ISRs. On GENETv5 there is a hardware issue
+-       * that prevents the signaling of link UP interrupts when
+-       * the link operates at 10Mbps, so fallback to polling for
+-       * those versions of GENET.
+-       */
+-      if (priv->internal_phy && !GENET_IS_V5(priv))
+-              dev->phydev->irq = PHY_IGNORE_INTERRUPT;
+-
+       return 0;
+ }
+ 
diff --git a/queue-4.19/net-dsa-b53-do-not-clear-existing-mirrored-port-mask.patch b/queue-4.19/net-dsa-b53-do-not-clear-existing-mirrored-port-mask.patch

new file mode 100644 (file)

index 0000000..fa1d429
--- /dev/null
+++ b/queue-4.19/net-dsa-b53-do-not-clear-existing-mirrored-port-mask.patch
@@ -0,0 +1,33 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Sat, 5 Oct 2019 15:05:18 -0700
+Subject: net: dsa: b53: Do not clear existing mirrored port mask
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit c763ac436b668d7417f0979430ec0312ede4093d ]
+
+Clearing the existing bitmask of mirrored ports essentially prevents us
+from capturing more than one port at any given time. This is clearly
+wrong, do not clear the bitmask prior to setting up the new port.
+
+Reported-by: Hubert Feurstein <h.feurstein@gmail.com>
+Fixes: ed3af5fd08eb ("net: dsa: b53: Add support for port mirroring")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Vivien Didelot <vivien.didelot@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/b53/b53_common.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -1584,7 +1584,6 @@ int b53_mirror_add(struct dsa_switch *ds
+               loc = B53_EG_MIR_CTL;
+ 
+       b53_read16(dev, B53_MGMT_PAGE, loc, &reg);
+-      reg &= ~MIRROR_MASK;
+       reg |= BIT(port);
+       b53_write16(dev, B53_MGMT_PAGE, loc, reg);
+ 
diff --git a/queue-4.19/net-dsa-bcm_sf2-fix-imp-setup-for-port-different-than-8.patch b/queue-4.19/net-dsa-bcm_sf2-fix-imp-setup-for-port-different-than-8.patch

new file mode 100644 (file)

index 0000000..9389193
--- /dev/null
+++ b/queue-4.19/net-dsa-bcm_sf2-fix-imp-setup-for-port-different-than-8.patch
@@ -0,0 +1,80 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Thu, 31 Oct 2019 15:54:05 -0700
+Subject: net: dsa: bcm_sf2: Fix IMP setup for port different than 8
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 5fc0f21246e50afdf318b5a3a941f7f4f57b8947 ]
+
+Since it became possible for the DSA core to use a CPU port different
+than 8, our bcm_sf2_imp_setup() function was broken because it assumes
+that registers are applicable to port 8. In particular, the port's MAC
+is going to stay disabled, so make sure we clear the RX_DIS and TX_DIS
+bits if we are not configured for port 8.
+
+Fixes: 9f91484f6fcc ("net: dsa: make "label" property optional for dsa2")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c |   36 +++++++++++++++++++++---------------
+ 1 file changed, 21 insertions(+), 15 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -41,22 +41,11 @@ static void bcm_sf2_imp_setup(struct dsa
+       unsigned int i;
+       u32 reg, offset;
+ 
+-      if (priv->type == BCM7445_DEVICE_ID)
+-              offset = CORE_STS_OVERRIDE_IMP;
+-      else
+-              offset = CORE_STS_OVERRIDE_IMP2;
+-
+       /* Enable the port memories */
+       reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL);
+       reg &= ~P_TXQ_PSM_VDD(port);
+       core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL);
+ 
+-      /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
+-      reg = core_readl(priv, CORE_IMP_CTL);
+-      reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN);
+-      reg &= ~(RX_DIS | TX_DIS);
+-      core_writel(priv, reg, CORE_IMP_CTL);
+-
+       /* Enable forwarding */
+       core_writel(priv, SW_FWDG_EN, CORE_SWMODE);
+ 
+@@ -75,10 +64,27 @@ static void bcm_sf2_imp_setup(struct dsa
+ 
+       b53_brcm_hdr_setup(ds, port);
+ 
+-      /* Force link status for IMP port */
+-      reg = core_readl(priv, offset);
+-      reg |= (MII_SW_OR | LINK_STS);
+-      core_writel(priv, reg, offset);
++      if (port == 8) {
++              if (priv->type == BCM7445_DEVICE_ID)
++                      offset = CORE_STS_OVERRIDE_IMP;
++              else
++                      offset = CORE_STS_OVERRIDE_IMP2;
++
++              /* Force link status for IMP port */
++              reg = core_readl(priv, offset);
++              reg |= (MII_SW_OR | LINK_STS);
++              core_writel(priv, reg, offset);
++
++              /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
++              reg = core_readl(priv, CORE_IMP_CTL);
++              reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN);
++              reg &= ~(RX_DIS | TX_DIS);
++              core_writel(priv, reg, CORE_IMP_CTL);
++      } else {
++              reg = core_readl(priv, CORE_G_PCTL_PORT(port));
++              reg &= ~(RX_DIS | TX_DIS);
++              core_writel(priv, reg, CORE_G_PCTL_PORT(port));
++      }
+ }
+ 
+ static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable)
diff --git a/queue-4.19/net-dsa-fix-switch-tree-list.patch b/queue-4.19/net-dsa-fix-switch-tree-list.patch

new file mode 100644 (file)

index 0000000..6cba6a8
--- /dev/null
+++ b/queue-4.19/net-dsa-fix-switch-tree-list.patch
@@ -0,0 +1,32 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Vivien Didelot <vivien.didelot@gmail.com>
+Date: Fri, 18 Oct 2019 17:02:46 -0400
+Subject: net: dsa: fix switch tree list
+
+From: Vivien Didelot <vivien.didelot@gmail.com>
+
+[ Upstream commit 50c7d2ba9de20f60a2d527ad6928209ef67e4cdd ]
+
+If there are multiple switch trees on the device, only the last one
+will be listed, because the arguments of list_add_tail are swapped.
+
+Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation")
+Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dsa/dsa2.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/dsa/dsa2.c
++++ b/net/dsa/dsa2.c
+@@ -49,7 +49,7 @@ static struct dsa_switch_tree *dsa_tree_
+       dst->index = index;
+ 
+       INIT_LIST_HEAD(&dst->list);
+-      list_add_tail(&dsa_tree_list, &dst->list);
++      list_add_tail(&dst->list, &dsa_tree_list);
+ 
+       kref_init(&dst->refcount);
+ 
diff --git a/queue-4.19/net-ethernet-ftgmac100-fix-dma-coherency-issue-with-sw-checksum.patch b/queue-4.19/net-ethernet-ftgmac100-fix-dma-coherency-issue-with-sw-checksum.patch

new file mode 100644 (file)

index 0000000..e780a94
--- /dev/null
+++ b/queue-4.19/net-ethernet-ftgmac100-fix-dma-coherency-issue-with-sw-checksum.patch
@@ -0,0 +1,73 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Date: Fri, 25 Oct 2019 13:47:24 +1100
+Subject: net: ethernet: ftgmac100: Fix DMA coherency issue with SW checksum
+
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+
+[ Upstream commit 88824e3bf29a2fcacfd9ebbfe03063649f0f3254 ]
+
+We are calling the checksum helper after the dma_map_single()
+call to map the packet. This is incorrect as the checksumming
+code will touch the packet from the CPU. This means the cache
+won't be properly flushes (or the bounce buffering will leave
+us with the unmodified packet to DMA).
+
+This moves the calculation of the checksum & vlan tags to
+before the DMA mapping.
+
+This also has the side effect of fixing another bug: If the
+checksum helper fails, we goto "drop" to drop the packet, which
+will not unmap the DMA mapping.
+
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Fixes: 05690d633f30 ("ftgmac100: Upgrade to NETIF_F_HW_CSUM")
+Reviewed-by: Vijay Khemka <vijaykhemka@fb.com>
+Tested-by: Vijay Khemka <vijaykhemka@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/faraday/ftgmac100.c |   25 ++++++++++++-------------
+ 1 file changed, 12 insertions(+), 13 deletions(-)
+
+--- a/drivers/net/ethernet/faraday/ftgmac100.c
++++ b/drivers/net/ethernet/faraday/ftgmac100.c
+@@ -739,6 +739,18 @@ static int ftgmac100_hard_start_xmit(str
+        */
+       nfrags = skb_shinfo(skb)->nr_frags;
+ 
++      /* Setup HW checksumming */
++      csum_vlan = 0;
++      if (skb->ip_summed == CHECKSUM_PARTIAL &&
++          !ftgmac100_prep_tx_csum(skb, &csum_vlan))
++              goto drop;
++
++      /* Add VLAN tag */
++      if (skb_vlan_tag_present(skb)) {
++              csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG;
++              csum_vlan |= skb_vlan_tag_get(skb) & 0xffff;
++      }
++
+       /* Get header len */
+       len = skb_headlen(skb);
+ 
+@@ -765,19 +777,6 @@ static int ftgmac100_hard_start_xmit(str
+       if (nfrags == 0)
+               f_ctl_stat |= FTGMAC100_TXDES0_LTS;
+       txdes->txdes3 = cpu_to_le32(map);
+-
+-      /* Setup HW checksumming */
+-      csum_vlan = 0;
+-      if (skb->ip_summed == CHECKSUM_PARTIAL &&
+-          !ftgmac100_prep_tx_csum(skb, &csum_vlan))
+-              goto drop;
+-
+-      /* Add VLAN tag */
+-      if (skb_vlan_tag_present(skb)) {
+-              csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG;
+-              csum_vlan |= skb_vlan_tag_get(skb) & 0xffff;
+-      }
+-
+       txdes->txdes1 = cpu_to_le32(csum_vlan);
+ 
+       /* Next descriptor */
diff --git a/queue-4.19/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch b/queue-4.19/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch

new file mode 100644 (file)

index 0000000..4c19b18
--- /dev/null
+++ b/queue-4.19/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch
@@ -0,0 +1,160 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 24 Oct 2019 13:50:27 -0700
+Subject: net: fix sk_page_frag() recursion from memory reclaim
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit 20eb4f29b60286e0d6dc01d9c260b4bd383c58fb ]
+
+sk_page_frag() optimizes skb_frag allocations by using per-task
+skb_frag cache when it knows it's the only user.  The condition is
+determined by seeing whether the socket allocation mask allows
+blocking - if the allocation may block, it obviously owns the task's
+context and ergo exclusively owns current->task_frag.
+
+Unfortunately, this misses recursion through memory reclaim path.
+Please take a look at the following backtrace.
+
+ [2] RIP: 0010:tcp_sendmsg_locked+0xccf/0xe10
+     ...
+     tcp_sendmsg+0x27/0x40
+     sock_sendmsg+0x30/0x40
+     sock_xmit.isra.24+0xa1/0x170 [nbd]
+     nbd_send_cmd+0x1d2/0x690 [nbd]
+     nbd_queue_rq+0x1b5/0x3b0 [nbd]
+     __blk_mq_try_issue_directly+0x108/0x1b0
+     blk_mq_request_issue_directly+0xbd/0xe0
+     blk_mq_try_issue_list_directly+0x41/0xb0
+     blk_mq_sched_insert_requests+0xa2/0xe0
+     blk_mq_flush_plug_list+0x205/0x2a0
+     blk_flush_plug_list+0xc3/0xf0
+ [1] blk_finish_plug+0x21/0x2e
+     _xfs_buf_ioapply+0x313/0x460
+     __xfs_buf_submit+0x67/0x220
+     xfs_buf_read_map+0x113/0x1a0
+     xfs_trans_read_buf_map+0xbf/0x330
+     xfs_btree_read_buf_block.constprop.42+0x95/0xd0
+     xfs_btree_lookup_get_block+0x95/0x170
+     xfs_btree_lookup+0xcc/0x470
+     xfs_bmap_del_extent_real+0x254/0x9a0
+     __xfs_bunmapi+0x45c/0xab0
+     xfs_bunmapi+0x15/0x30
+     xfs_itruncate_extents_flags+0xca/0x250
+     xfs_free_eofblocks+0x181/0x1e0
+     xfs_fs_destroy_inode+0xa8/0x1b0
+     destroy_inode+0x38/0x70
+     dispose_list+0x35/0x50
+     prune_icache_sb+0x52/0x70
+     super_cache_scan+0x120/0x1a0
+     do_shrink_slab+0x120/0x290
+     shrink_slab+0x216/0x2b0
+     shrink_node+0x1b6/0x4a0
+     do_try_to_free_pages+0xc6/0x370
+     try_to_free_mem_cgroup_pages+0xe3/0x1e0
+     try_charge+0x29e/0x790
+     mem_cgroup_charge_skmem+0x6a/0x100
+     __sk_mem_raise_allocated+0x18e/0x390
+     __sk_mem_schedule+0x2a/0x40
+ [0] tcp_sendmsg_locked+0x8eb/0xe10
+     tcp_sendmsg+0x27/0x40
+     sock_sendmsg+0x30/0x40
+     ___sys_sendmsg+0x26d/0x2b0
+     __sys_sendmsg+0x57/0xa0
+     do_syscall_64+0x42/0x100
+     entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+In [0], tcp_send_msg_locked() was using current->page_frag when it
+called sk_wmem_schedule().  It already calculated how many bytes can
+be fit into current->page_frag.  Due to memory pressure,
+sk_wmem_schedule() called into memory reclaim path which called into
+xfs and then IO issue path.  Because the filesystem in question is
+backed by nbd, the control goes back into the tcp layer - back into
+tcp_sendmsg_locked().
+
+nbd sets sk_allocation to (GFP_NOIO | __GFP_MEMALLOC) which makes
+sense - it's in the process of freeing memory and wants to be able to,
+e.g., drop clean pages to make forward progress.  However, this
+confused sk_page_frag() called from [2].  Because it only tests
+whether the allocation allows blocking which it does, it now thinks
+current->page_frag can be used again although it already was being
+used in [0].
+
+After [2] used current->page_frag, the offset would be increased by
+the used amount.  When the control returns to [0],
+current->page_frag's offset is increased and the previously calculated
+number of bytes now may overrun the end of allocated memory leading to
+silent memory corruptions.
+
+Fix it by adding gfpflags_normal_context() which tests sleepable &&
+!reclaim and use it to determine whether to use current->task_frag.
+
+v2: Eric didn't like gfp flags being tested twice.  Introduce a new
+    helper gfpflags_normal_context() and combine the two tests.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Cc: Josef Bacik <josef@toxicpanda.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/gfp.h |   23 +++++++++++++++++++++++
+ include/net/sock.h  |   11 ++++++++---
+ 2 files changed, 31 insertions(+), 3 deletions(-)
+
+--- a/include/linux/gfp.h
++++ b/include/linux/gfp.h
+@@ -325,6 +325,29 @@ static inline bool gfpflags_allow_blocki
+       return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
+ }
+ 
++/**
++ * gfpflags_normal_context - is gfp_flags a normal sleepable context?
++ * @gfp_flags: gfp_flags to test
++ *
++ * Test whether @gfp_flags indicates that the allocation is from the
++ * %current context and allowed to sleep.
++ *
++ * An allocation being allowed to block doesn't mean it owns the %current
++ * context.  When direct reclaim path tries to allocate memory, the
++ * allocation context is nested inside whatever %current was doing at the
++ * time of the original allocation.  The nested allocation may be allowed
++ * to block but modifying anything %current owns can corrupt the outer
++ * context's expectations.
++ *
++ * %true result from this function indicates that the allocation context
++ * can sleep and use anything that's associated with %current.
++ */
++static inline bool gfpflags_normal_context(const gfp_t gfp_flags)
++{
++      return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) ==
++              __GFP_DIRECT_RECLAIM;
++}
++
+ #ifdef CONFIG_HIGHMEM
+ #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
+ #else
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -2216,12 +2216,17 @@ struct sk_buff *sk_stream_alloc_skb(stru
+  * sk_page_frag - return an appropriate page_frag
+  * @sk: socket
+  *
+- * If socket allocation mode allows current thread to sleep, it means its
+- * safe to use the per task page_frag instead of the per socket one.
++ * Use the per task page_frag instead of the per socket one for
++ * optimization when we know that we're in the normal context and owns
++ * everything that's associated with %current.
++ *
++ * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
++ * inside other socket operations and end up recursing into sk_page_frag()
++ * while it's already in use.
+  */
+ static inline struct page_frag *sk_page_frag(struct sock *sk)
+ {
+-      if (gfpflags_allow_blocking(sk->sk_allocation))
++      if (gfpflags_normal_context(sk->sk_allocation))
+               return &current->task_frag;
+ 
+       return &sk->sk_frag;
diff --git a/queue-4.19/net-flow_dissector-switch-to-siphash.patch b/queue-4.19/net-flow_dissector-switch-to-siphash.patch

new file mode 100644 (file)

index 0000000..54119bd
--- /dev/null
+++ b/queue-4.19/net-flow_dissector-switch-to-siphash.patch
@@ -0,0 +1,376 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 22 Oct 2019 07:57:46 -0700
+Subject: net/flow_dissector: switch to siphash
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 55667441c84fa5e0911a0aac44fb059c15ba6da2 ]
+
+UDP IPv6 packets auto flowlabels are using a 32bit secret
+(static u32 hashrnd in net/core/flow_dissector.c) and
+apply jhash() over fields known by the receivers.
+
+Attackers can easily infer the 32bit secret and use this information
+to identify a device and/or user, since this 32bit secret is only
+set at boot time.
+
+Really, using jhash() to generate cookies sent on the wire
+is a serious security concern.
+
+Trying to change the rol32(hash, 16) in ip6_make_flowlabel() would be
+a dead end. Trying to periodically change the secret (like in sch_sfq.c)
+could change paths taken in the network for long lived flows.
+
+Let's switch to siphash, as we did in commit df453700e8d8
+("inet: switch IP ID generator to siphash")
+
+Using a cryptographically strong pseudo random function will solve this
+privacy issue and more generally remove other weak points in the stack.
+
+Packet schedulers using skb_get_hash_perturb() benefit from this change.
+
+Fixes: b56774163f99 ("ipv6: Enable auto flow labels by default")
+Fixes: 42240901f7c4 ("ipv6: Implement different admin modes for automatic flow labels")
+Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel")
+Fixes: cb1ce2ef387b ("ipv6: Implement automatic flow label generation on transmit")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Jonathan Berger <jonathann1@walla.com>
+Reported-by: Amit Klein <aksecurity@gmail.com>
+Reported-by: Benny Pinkas <benny@pinkas.net>
+Cc: Tom Herbert <tom@herbertland.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h       |    3 ++-
+ include/net/flow_dissector.h |    3 ++-
+ include/net/fq.h             |    2 +-
+ include/net/fq_impl.h        |    4 ++--
+ net/core/flow_dissector.c    |   38 ++++++++++++++++----------------------
+ net/sched/sch_hhf.c          |    8 ++++----
+ net/sched/sch_sfb.c          |   13 +++++++------
+ net/sched/sch_sfq.c          |   14 ++++++++------
+ 8 files changed, 42 insertions(+), 43 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1251,7 +1251,8 @@ static inline __u32 skb_get_hash_flowi6(
+       return skb->hash;
+ }
+ 
+-__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb);
++__u32 skb_get_hash_perturb(const struct sk_buff *skb,
++                         const siphash_key_t *perturb);
+ 
+ static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
+ {
+--- a/include/net/flow_dissector.h
++++ b/include/net/flow_dissector.h
+@@ -4,6 +4,7 @@
+ 
+ #include <linux/types.h>
+ #include <linux/in6.h>
++#include <linux/siphash.h>
+ #include <uapi/linux/if_ether.h>
+ 
+ /**
+@@ -252,7 +253,7 @@ struct flow_keys_basic {
+ struct flow_keys {
+       struct flow_dissector_key_control control;
+ #define FLOW_KEYS_HASH_START_FIELD basic
+-      struct flow_dissector_key_basic basic;
++      struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT);
+       struct flow_dissector_key_tags tags;
+       struct flow_dissector_key_vlan vlan;
+       struct flow_dissector_key_vlan cvlan;
+--- a/include/net/fq.h
++++ b/include/net/fq.h
+@@ -70,7 +70,7 @@ struct fq {
+       struct list_head backlogs;
+       spinlock_t lock;
+       u32 flows_cnt;
+-      u32 perturbation;
++      siphash_key_t   perturbation;
+       u32 limit;
+       u32 memory_limit;
+       u32 memory_usage;
+--- a/include/net/fq_impl.h
++++ b/include/net/fq_impl.h
+@@ -118,7 +118,7 @@ static struct fq_flow *fq_flow_classify(
+ 
+       lockdep_assert_held(&fq->lock);
+ 
+-      hash = skb_get_hash_perturb(skb, fq->perturbation);
++      hash = skb_get_hash_perturb(skb, &fq->perturbation);
+       idx = reciprocal_scale(hash, fq->flows_cnt);
+       flow = &fq->flows[idx];
+ 
+@@ -307,7 +307,7 @@ static int fq_init(struct fq *fq, int fl
+       INIT_LIST_HEAD(&fq->backlogs);
+       spin_lock_init(&fq->lock);
+       fq->flows_cnt = max_t(u32, flows_cnt, 1);
+-      fq->perturbation = prandom_u32();
++      get_random_bytes(&fq->perturbation, sizeof(fq->perturbation));
+       fq->quantum = 300;
+       fq->limit = 8192;
+       fq->memory_limit = 16 << 20; /* 16 MBytes */
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -1077,30 +1077,21 @@ out_bad:
+ }
+ EXPORT_SYMBOL(__skb_flow_dissect);
+ 
+-static u32 hashrnd __read_mostly;
++static siphash_key_t hashrnd __read_mostly;
+ static __always_inline void __flow_hash_secret_init(void)
+ {
+       net_get_random_once(&hashrnd, sizeof(hashrnd));
+ }
+ 
+-static __always_inline u32 __flow_hash_words(const u32 *words, u32 length,
+-                                           u32 keyval)
++static const void *flow_keys_hash_start(const struct flow_keys *flow)
+ {
+-      return jhash2(words, length, keyval);
+-}
+-
+-static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow)
+-{
+-      const void *p = flow;
+-
+-      BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32));
+-      return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET);
++      BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT);
++      return &flow->FLOW_KEYS_HASH_START_FIELD;
+ }
+ 
+ static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
+ {
+       size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
+-      BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
+       BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
+                    sizeof(*flow) - sizeof(flow->addrs));
+ 
+@@ -1115,7 +1106,7 @@ static inline size_t flow_keys_hash_leng
+               diff -= sizeof(flow->addrs.tipckey);
+               break;
+       }
+-      return (sizeof(*flow) - diff) / sizeof(u32);
++      return sizeof(*flow) - diff;
+ }
+ 
+ __be32 flow_get_u32_src(const struct flow_keys *flow)
+@@ -1181,14 +1172,15 @@ static inline void __flow_hash_consisten
+       }
+ }
+ 
+-static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
++static inline u32 __flow_hash_from_keys(struct flow_keys *keys,
++                                      const siphash_key_t *keyval)
+ {
+       u32 hash;
+ 
+       __flow_hash_consistentify(keys);
+ 
+-      hash = __flow_hash_words(flow_keys_hash_start(keys),
+-                               flow_keys_hash_length(keys), keyval);
++      hash = siphash(flow_keys_hash_start(keys),
++                     flow_keys_hash_length(keys), keyval);
+       if (!hash)
+               hash = 1;
+ 
+@@ -1198,12 +1190,13 @@ static inline u32 __flow_hash_from_keys(
+ u32 flow_hash_from_keys(struct flow_keys *keys)
+ {
+       __flow_hash_secret_init();
+-      return __flow_hash_from_keys(keys, hashrnd);
++      return __flow_hash_from_keys(keys, &hashrnd);
+ }
+ EXPORT_SYMBOL(flow_hash_from_keys);
+ 
+ static inline u32 ___skb_get_hash(const struct sk_buff *skb,
+-                                struct flow_keys *keys, u32 keyval)
++                                struct flow_keys *keys,
++                                const siphash_key_t *keyval)
+ {
+       skb_flow_dissect_flow_keys(skb, keys,
+                                  FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+@@ -1251,7 +1244,7 @@ u32 __skb_get_hash_symmetric(const struc
+                          NULL, 0, 0, 0,
+                          FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+ 
+-      return __flow_hash_from_keys(&keys, hashrnd);
++      return __flow_hash_from_keys(&keys, &hashrnd);
+ }
+ EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
+ 
+@@ -1271,13 +1264,14 @@ void __skb_get_hash(struct sk_buff *skb)
+ 
+       __flow_hash_secret_init();
+ 
+-      hash = ___skb_get_hash(skb, &keys, hashrnd);
++      hash = ___skb_get_hash(skb, &keys, &hashrnd);
+ 
+       __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
+ }
+ EXPORT_SYMBOL(__skb_get_hash);
+ 
+-__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
++__u32 skb_get_hash_perturb(const struct sk_buff *skb,
++                         const siphash_key_t *perturb)
+ {
+       struct flow_keys keys;
+ 
+--- a/net/sched/sch_hhf.c
++++ b/net/sched/sch_hhf.c
+@@ -4,11 +4,11 @@
+  * Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com>
+  */
+ 
+-#include <linux/jhash.h>
+ #include <linux/jiffies.h>
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
+ #include <linux/vmalloc.h>
++#include <linux/siphash.h>
+ #include <net/pkt_sched.h>
+ #include <net/sock.h>
+ 
+@@ -125,7 +125,7 @@ struct wdrr_bucket {
+ 
+ struct hhf_sched_data {
+       struct wdrr_bucket buckets[WDRR_BUCKET_CNT];
+-      u32                perturbation;   /* hash perturbation */
++      siphash_key_t      perturbation;   /* hash perturbation */
+       u32                quantum;        /* psched_mtu(qdisc_dev(sch)); */
+       u32                drop_overlimit; /* number of times max qdisc packet
+                                           * limit was hit
+@@ -263,7 +263,7 @@ static enum wdrr_bucket_idx hhf_classify
+       }
+ 
+       /* Get hashed flow-id of the skb. */
+-      hash = skb_get_hash_perturb(skb, q->perturbation);
++      hash = skb_get_hash_perturb(skb, &q->perturbation);
+ 
+       /* Check if this packet belongs to an already established HH flow. */
+       flow_pos = hash & HHF_BIT_MASK;
+@@ -580,7 +580,7 @@ static int hhf_init(struct Qdisc *sch, s
+ 
+       sch->limit = 1000;
+       q->quantum = psched_mtu(qdisc_dev(sch));
+-      q->perturbation = prandom_u32();
++      get_random_bytes(&q->perturbation, sizeof(q->perturbation));
+       INIT_LIST_HEAD(&q->new_buckets);
+       INIT_LIST_HEAD(&q->old_buckets);
+ 
+--- a/net/sched/sch_sfb.c
++++ b/net/sched/sch_sfb.c
+@@ -22,7 +22,7 @@
+ #include <linux/errno.h>
+ #include <linux/skbuff.h>
+ #include <linux/random.h>
+-#include <linux/jhash.h>
++#include <linux/siphash.h>
+ #include <net/ip.h>
+ #include <net/pkt_sched.h>
+ #include <net/pkt_cls.h>
+@@ -49,7 +49,7 @@ struct sfb_bucket {
+  * (Section 4.4 of SFB reference : moving hash functions)
+  */
+ struct sfb_bins {
+-      u32               perturbation; /* jhash perturbation */
++      siphash_key_t     perturbation; /* siphash key */
+       struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS];
+ };
+ 
+@@ -221,7 +221,8 @@ static u32 sfb_compute_qlen(u32 *prob_r,
+ 
+ static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q)
+ {
+-      q->bins[slot].perturbation = prandom_u32();
++      get_random_bytes(&q->bins[slot].perturbation,
++                       sizeof(q->bins[slot].perturbation));
+ }
+ 
+ static void sfb_swap_slot(struct sfb_sched_data *q)
+@@ -318,9 +319,9 @@ static int sfb_enqueue(struct sk_buff *s
+               /* If using external classifiers, get result and record it. */
+               if (!sfb_classify(skb, fl, &ret, &salt))
+                       goto other_drop;
+-              sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
++              sfbhash = siphash_1u32(salt, &q->bins[slot].perturbation);
+       } else {
+-              sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
++              sfbhash = skb_get_hash_perturb(skb, &q->bins[slot].perturbation);
+       }
+ 
+ 
+@@ -356,7 +357,7 @@ static int sfb_enqueue(struct sk_buff *s
+               /* Inelastic flow */
+               if (q->double_buffering) {
+                       sfbhash = skb_get_hash_perturb(skb,
+-                          q->bins[slot].perturbation);
++                          &q->bins[slot].perturbation);
+                       if (!sfbhash)
+                               sfbhash = 1;
+                       sfb_skb_cb(skb)->hashes[slot] = sfbhash;
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -18,7 +18,7 @@
+ #include <linux/errno.h>
+ #include <linux/init.h>
+ #include <linux/skbuff.h>
+-#include <linux/jhash.h>
++#include <linux/siphash.h>
+ #include <linux/slab.h>
+ #include <linux/vmalloc.h>
+ #include <net/netlink.h>
+@@ -121,7 +121,7 @@ struct sfq_sched_data {
+       u8              headdrop;
+       u8              maxdepth;       /* limit of packets per flow */
+ 
+-      u32             perturbation;
++      siphash_key_t   perturbation;
+       u8              cur_depth;      /* depth of longest slot */
+       u8              flags;
+       unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
+@@ -161,7 +161,7 @@ static inline struct sfq_head *sfq_dep_h
+ static unsigned int sfq_hash(const struct sfq_sched_data *q,
+                            const struct sk_buff *skb)
+ {
+-      return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
++      return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1);
+ }
+ 
+ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
+@@ -611,9 +611,11 @@ static void sfq_perturbation(struct time
+       struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
+       struct Qdisc *sch = q->sch;
+       spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
++      siphash_key_t nkey;
+ 
++      get_random_bytes(&nkey, sizeof(nkey));
+       spin_lock(root_lock);
+-      q->perturbation = prandom_u32();
++      q->perturbation = nkey;
+       if (!q->filter_list && q->tail)
+               sfq_rehash(sch);
+       spin_unlock(root_lock);
+@@ -692,7 +694,7 @@ static int sfq_change(struct Qdisc *sch,
+       del_timer(&q->perturb_timer);
+       if (q->perturb_period) {
+               mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
+-              q->perturbation = prandom_u32();
++              get_random_bytes(&q->perturbation, sizeof(q->perturbation));
+       }
+       sch_tree_unlock(sch);
+       kfree(p);
+@@ -749,7 +751,7 @@ static int sfq_init(struct Qdisc *sch, s
+       q->quantum = psched_mtu(qdisc_dev(sch));
+       q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+       q->perturb_period = 0;
+-      q->perturbation = prandom_u32();
++      get_random_bytes(&q->perturbation, sizeof(q->perturbation));
+ 
+       if (opt) {
+               int err = sfq_change(sch, opt);
diff --git a/queue-4.19/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch b/queue-4.19/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch

new file mode 100644 (file)

index 0000000..d75f0f5
--- /dev/null
+++ b/queue-4.19/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch
@@ -0,0 +1,76 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
+Date: Mon, 28 Oct 2019 13:09:46 +0800
+Subject: net: hisilicon: Fix ping latency when deal with high throughput
+
+From: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
+
+[ Upstream commit e56bd641ca61beb92b135298d5046905f920b734 ]
+
+This is due to error in over budget processing.
+When dealing with high throughput, the used buffers
+that exceeds the budget is not cleaned up. In addition,
+it takes a lot of cycles to clean up the used buffer,
+and then the buffer where the valid data is located can take effect.
+
+Signed-off-by: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hip04_eth.c |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
++++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
+@@ -174,6 +174,7 @@ struct hip04_priv {
+       dma_addr_t rx_phys[RX_DESC_NUM];
+       unsigned int rx_head;
+       unsigned int rx_buf_size;
++      unsigned int rx_cnt_remaining;
+ 
+       struct device_node *phy_node;
+       struct phy_device *phy;
+@@ -487,7 +488,6 @@ static int hip04_rx_poll(struct napi_str
+       struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi);
+       struct net_device *ndev = priv->ndev;
+       struct net_device_stats *stats = &ndev->stats;
+-      unsigned int cnt = hip04_recv_cnt(priv);
+       struct rx_desc *desc;
+       struct sk_buff *skb;
+       unsigned char *buf;
+@@ -500,8 +500,8 @@ static int hip04_rx_poll(struct napi_str
+ 
+       /* clean up tx descriptors */
+       tx_remaining = hip04_tx_reclaim(ndev, false);
+-
+-      while (cnt && !last) {
++      priv->rx_cnt_remaining += hip04_recv_cnt(priv);
++      while (priv->rx_cnt_remaining && !last) {
+               buf = priv->rx_buf[priv->rx_head];
+               skb = build_skb(buf, priv->rx_buf_size);
+               if (unlikely(!skb)) {
+@@ -547,11 +547,13 @@ refill:
+               hip04_set_recv_desc(priv, phys);
+ 
+               priv->rx_head = RX_NEXT(priv->rx_head);
+-              if (rx >= budget)
++              if (rx >= budget) {
++                      --priv->rx_cnt_remaining;
+                       goto done;
++              }
+ 
+-              if (--cnt == 0)
+-                      cnt = hip04_recv_cnt(priv);
++              if (--priv->rx_cnt_remaining == 0)
++                      priv->rx_cnt_remaining += hip04_recv_cnt(priv);
+       }
+ 
+       if (!(priv->reg_inten & RCV_INT)) {
+@@ -636,6 +638,7 @@ static int hip04_mac_open(struct net_dev
+       int i;
+ 
+       priv->rx_head = 0;
++      priv->rx_cnt_remaining = 0;
+       priv->tx_head = 0;
+       priv->tx_tail = 0;
+       hip04_reset_ppe(priv);
diff --git a/queue-4.19/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch b/queue-4.19/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch

new file mode 100644 (file)

index 0000000..b5dfe6f
--- /dev/null
+++ b/queue-4.19/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch
@@ -0,0 +1,94 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Eran Ben Elisha <eranbe@mellanox.com>
+Date: Sun, 27 Oct 2019 16:39:15 +0200
+Subject: net/mlx4_core: Dynamically set guaranteed amount of counters per VF
+
+From: Eran Ben Elisha <eranbe@mellanox.com>
+
+[ Upstream commit e19868efea0c103f23b4b7e986fd0a703822111f ]
+
+Prior to this patch, the amount of counters guaranteed per VF in the
+resource tracker was MLX4_VF_COUNTERS_PER_PORT * MLX4_MAX_PORTS. It was
+set regardless if the VF was single or dual port.
+This caused several VFs to have no guaranteed counters although the
+system could satisfy their request.
+
+The fix is to dynamically guarantee counters, based on each VF
+specification.
+
+Fixes: 9de92c60beaa ("net/mlx4_core: Adjust counter grant policy in the resource tracker")
+Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/resource_tracker.c |   42 +++++++++++-------
+ 1 file changed, 26 insertions(+), 16 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
++++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+@@ -471,12 +471,31 @@ void mlx4_init_quotas(struct mlx4_dev *d
+               priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf];
+ }
+ 
+-static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev)
++static int
++mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev,
++                               struct resource_allocator *res_alloc,
++                               int vf)
+ {
+-      /* reduce the sink counter */
+-      return (dev->caps.max_counters - 1 -
+-              (MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS))
+-              / MLX4_MAX_PORTS;
++      struct mlx4_active_ports actv_ports;
++      int ports, counters_guaranteed;
++
++      /* For master, only allocate according to the number of phys ports */
++      if (vf == mlx4_master_func_num(dev))
++              return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports;
++
++      /* calculate real number of ports for the VF */
++      actv_ports = mlx4_get_active_ports(dev, vf);
++      ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
++      counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT;
++
++      /* If we do not have enough counters for this VF, do not
++       * allocate any for it. '-1' to reduce the sink counter.
++       */
++      if ((res_alloc->res_reserved + counters_guaranteed) >
++          (dev->caps.max_counters - 1))
++              return 0;
++
++      return counters_guaranteed;
+ }
+ 
+ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
+@@ -484,7 +503,6 @@ int mlx4_init_resource_tracker(struct ml
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       int i, j;
+       int t;
+-      int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev);
+ 
+       priv->mfunc.master.res_tracker.slave_list =
+               kcalloc(dev->num_slaves, sizeof(struct slave_list),
+@@ -603,16 +621,8 @@ int mlx4_init_resource_tracker(struct ml
+                               break;
+                       case RES_COUNTER:
+                               res_alloc->quota[t] = dev->caps.max_counters;
+-                              if (t == mlx4_master_func_num(dev))
+-                                      res_alloc->guaranteed[t] =
+-                                              MLX4_PF_COUNTERS_PER_PORT *
+-                                              MLX4_MAX_PORTS;
+-                              else if (t <= max_vfs_guarantee_counter)
+-                                      res_alloc->guaranteed[t] =
+-                                              MLX4_VF_COUNTERS_PER_PORT *
+-                                              MLX4_MAX_PORTS;
+-                              else
+-                                      res_alloc->guaranteed[t] = 0;
++                              res_alloc->guaranteed[t] =
++                                      mlx4_calc_res_counter_guaranteed(dev, res_alloc, t);
+                               break;
+                       default:
+                               break;
diff --git a/queue-4.19/net-mlx5e-fix-ethtool-self-test-link-speed.patch b/queue-4.19/net-mlx5e-fix-ethtool-self-test-link-speed.patch

new file mode 100644 (file)

index 0000000..5f4808e
--- /dev/null
+++ b/queue-4.19/net-mlx5e-fix-ethtool-self-test-link-speed.patch
@@ -0,0 +1,60 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Aya Levin <ayal@mellanox.com>
+Date: Wed, 2 Oct 2019 16:53:21 +0300
+Subject: net/mlx5e: Fix ethtool self test: link speed
+
+From: Aya Levin <ayal@mellanox.com>
+
+[ Upstream commit 534e7366f41b0c689b01af4375aefcd1462adedf ]
+
+Ethtool self test contains a test for link speed. This test reads the
+PTYS register and determines whether the current speed is valid or not.
+Change current implementation to use the function mlx5e_port_linkspeed()
+that does the same check and fails when speed is invalid. This code
+redundancy lead to a bug when mlx5e_port_linkspeed() was updated with
+expended speeds and the self test was not.
+
+Fixes: 2c81bfd5ae56 ("net/mlx5e: Move port speed code from en_ethtool.c to en/port.c")
+Signed-off-by: Aya Levin <ayal@mellanox.com>
+Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c |   15 +++------------
+ 1 file changed, 3 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+@@ -35,6 +35,7 @@
+ #include <linux/udp.h>
+ #include <net/udp.h>
+ #include "en.h"
++#include "en/port.h"
+ 
+ enum {
+       MLX5E_ST_LINK_STATE,
+@@ -80,22 +81,12 @@ static int mlx5e_test_link_state(struct
+ 
+ static int mlx5e_test_link_speed(struct mlx5e_priv *priv)
+ {
+-      u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+-      u32 eth_proto_oper;
+-      int i;
++      u32 speed;
+ 
+       if (!netif_carrier_ok(priv->netdev))
+               return 1;
+ 
+-      if (mlx5_query_port_ptys(priv->mdev, out, sizeof(out), MLX5_PTYS_EN, 1))
+-              return 1;
+-
+-      eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+-      for (i = 0; i < MLX5E_LINK_MODES_NUMBER; i++) {
+-              if (eth_proto_oper & MLX5E_PROT_MASK(i))
+-                      return 0;
+-      }
+-      return 1;
++      return mlx5e_port_linkspeed(priv->mdev, &speed);
+ }
+ 
+ struct mlx5ehdr {
diff --git a/queue-4.19/net-mlx5e-fix-handling-of-compressed-cqes-in-case-of-low-napi-budget.patch b/queue-4.19/net-mlx5e-fix-handling-of-compressed-cqes-in-case-of-low-napi-budget.patch

new file mode 100644 (file)

index 0000000..9402d7d
--- /dev/null
+++ b/queue-4.19/net-mlx5e-fix-handling-of-compressed-cqes-in-case-of-low-napi-budget.patch
@@ -0,0 +1,57 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+Date: Mon, 16 Sep 2019 14:54:20 +0300
+Subject: net/mlx5e: Fix handling of compressed CQEs in case of low NAPI budget
+
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+
+[ Upstream commit 9df86bdb6746d7fcfc2fda715f7a7c3d0ddb2654 ]
+
+When CQE compression is enabled, compressed CQEs use the following
+structure: a title is followed by one or many blocks, each containing 8
+mini CQEs (except the last, which may contain fewer mini CQEs).
+
+Due to NAPI budget restriction, a complete structure is not always
+parsed in one NAPI run, and some blocks with mini CQEs may be deferred
+to the next NAPI poll call - we have the mlx5e_decompress_cqes_cont call
+in the beginning of mlx5e_poll_rx_cq. However, if the budget is
+extremely low, some blocks may be left even after that, but the code
+that follows the mlx5e_decompress_cqes_cont call doesn't check it and
+assumes that a new CQE begins, which may not be the case. In such cases,
+random memory corruptions occur.
+
+An extremely low NAPI budget of 8 is used when busy_poll or busy_read is
+active.
+
+This commit adds a check to make sure that the previous compressed CQE
+has been completely parsed after mlx5e_decompress_cqes_cont, otherwise
+it prevents a new CQE from being fetched in the middle of a compressed
+CQE.
+
+This commit fixes random crashes in __build_skb, __page_pool_put_page
+and other not-related-directly places, that used to happen when both CQE
+compression and busy_poll/busy_read were enabled.
+
+Fixes: 7219ab34f184 ("net/mlx5e: CQE compression")
+Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -1267,8 +1267,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq
+       if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
+               return 0;
+ 
+-      if (cq->decmprs_left)
++      if (cq->decmprs_left) {
+               work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
++              if (cq->decmprs_left || work_done >= budget)
++                      goto out;
++      }
+ 
+       cqe = mlx5_cqwq_get_cqe(&cq->wq);
+       if (!cqe) {
diff --git a/queue-4.19/net-phy-bcm7xxx-define-soft_reset-for-40nm-ephy.patch b/queue-4.19/net-phy-bcm7xxx-define-soft_reset-for-40nm-ephy.patch

new file mode 100644 (file)

index 0000000..9c2be40
--- /dev/null
+++ b/queue-4.19/net-phy-bcm7xxx-define-soft_reset-for-40nm-ephy.patch
@@ -0,0 +1,35 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Doug Berger <opendmb@gmail.com>
+Date: Wed, 16 Oct 2019 16:06:30 -0700
+Subject: net: phy: bcm7xxx: define soft_reset for 40nm EPHY
+
+From: Doug Berger <opendmb@gmail.com>
+
+[ Upstream commit fe586b823372a9f43f90e2c6aa0573992ce7ccb7 ]
+
+The internal 40nm EPHYs use a "Workaround for putting the PHY in
+IDDQ mode." These PHYs require a soft reset to restore functionality
+after they are powered back up.
+
+This commit defines the soft_reset function to use genphy_soft_reset
+during phy_init_hw to accommodate this.
+
+Fixes: 6e2d85ec0559 ("net: phy: Stop with excessive soft reset")
+Signed-off-by: Doug Berger <opendmb@gmail.com>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/bcm7xxx.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/phy/bcm7xxx.c
++++ b/drivers/net/phy/bcm7xxx.c
+@@ -643,6 +643,7 @@ static int bcm7xxx_28nm_probe(struct phy
+       .name           = _name,                                        \
+       .features       = PHY_BASIC_FEATURES,                           \
+       .flags          = PHY_IS_INTERNAL,                              \
++      .soft_reset     = genphy_soft_reset,                            \
+       .config_init    = bcm7xxx_config_init,                          \
+       .suspend        = bcm7xxx_suspend,                              \
+       .resume         = bcm7xxx_config_init,                          \
diff --git a/queue-4.19/net-usb-lan78xx-connect-phy-before-registering-mac.patch b/queue-4.19/net-usb-lan78xx-connect-phy-before-registering-mac.patch

new file mode 100644 (file)

index 0000000..af75378
--- /dev/null
+++ b/queue-4.19/net-usb-lan78xx-connect-phy-before-registering-mac.patch
@@ -0,0 +1,60 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Andrew Lunn <andrew@lunn.ch>
+Date: Thu, 17 Oct 2019 21:29:26 +0200
+Subject: net: usb: lan78xx: Connect PHY before registering MAC
+
+From: Andrew Lunn <andrew@lunn.ch>
+
+[ Upstream commit 38b4fe320119859c11b1dc06f6b4987a16344fa1 ]
+
+As soon as the netdev is registers, the kernel can start using the
+interface. If the driver connects the MAC to the PHY after the netdev
+is registered, there is a race condition where the interface can be
+opened without having the PHY connected.
+
+Change the order to close this race condition.
+
+Fixes: 92571a1aae40 ("lan78xx: Connect phy early")
+Reported-by: Daniel Wagner <dwagner@suse.de>
+Signed-off-by: Andrew Lunn <andrew@lunn.ch>
+Tested-by: Daniel Wagner <dwagner@suse.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/lan78xx.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -3799,10 +3799,14 @@ static int lan78xx_probe(struct usb_inte
+       /* driver requires remote-wakeup capability during autosuspend. */
+       intf->needs_remote_wakeup = 1;
+ 
++      ret = lan78xx_phy_init(dev);
++      if (ret < 0)
++              goto out4;
++
+       ret = register_netdev(netdev);
+       if (ret != 0) {
+               netif_err(dev, probe, netdev, "couldn't register the device\n");
+-              goto out4;
++              goto out5;
+       }
+ 
+       usb_set_intfdata(intf, dev);
+@@ -3815,14 +3819,10 @@ static int lan78xx_probe(struct usb_inte
+       pm_runtime_set_autosuspend_delay(&udev->dev,
+                                        DEFAULT_AUTOSUSPEND_DELAY);
+ 
+-      ret = lan78xx_phy_init(dev);
+-      if (ret < 0)
+-              goto out5;
+-
+       return 0;
+ 
+ out5:
+-      unregister_netdev(netdev);
++      phy_disconnect(netdev->phydev);
+ out4:
+       usb_free_urb(dev->urb_intr);
+ out3:
diff --git a/queue-4.19/net-usb-lan78xx-disable-interrupts-before-calling-generic_handle_irq.patch b/queue-4.19/net-usb-lan78xx-disable-interrupts-before-calling-generic_handle_irq.patch

new file mode 100644 (file)

index 0000000..ab601e1
--- /dev/null
+++ b/queue-4.19/net-usb-lan78xx-disable-interrupts-before-calling-generic_handle_irq.patch
@@ -0,0 +1,92 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Daniel Wagner <dwagner@suse.de>
+Date: Fri, 25 Oct 2019 10:04:13 +0200
+Subject: net: usb: lan78xx: Disable interrupts before calling generic_handle_irq()
+
+From: Daniel Wagner <dwagner@suse.de>
+
+[ Upstream commit 0a29ac5bd3a988dc151c8d26910dec2557421f64 ]
+
+lan78xx_status() will run with interrupts enabled due to the change in
+ed194d136769 ("usb: core: remove local_irq_save() around ->complete()
+handler"). generic_handle_irq() expects to be run with IRQs disabled.
+
+[    4.886203] 000: irq 79 handler irq_default_primary_handler+0x0/0x8 enabled interrupts
+[    4.886243] 000: WARNING: CPU: 0 PID: 0 at kernel/irq/handle.c:152 __handle_irq_event_percpu+0x154/0x168
+[    4.896294] 000: Modules linked in:
+[    4.896301] 000: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.6 #39
+[    4.896310] 000: Hardware name: Raspberry Pi 3 Model B+ (DT)
+[    4.896315] 000: pstate: 60000005 (nZCv daif -PAN -UAO)
+[    4.896321] 000: pc : __handle_irq_event_percpu+0x154/0x168
+[    4.896331] 000: lr : __handle_irq_event_percpu+0x154/0x168
+[    4.896339] 000: sp : ffff000010003cc0
+[    4.896346] 000: x29: ffff000010003cc0 x28: 0000000000000060
+[    4.896355] 000: x27: ffff000011021980 x26: ffff00001189c72b
+[    4.896364] 000: x25: ffff000011702bc0 x24: ffff800036d6e400
+[    4.896373] 000: x23: 000000000000004f x22: ffff000010003d64
+[    4.896381] 000: x21: 0000000000000000 x20: 0000000000000002
+[    4.896390] 000: x19: ffff8000371c8480 x18: 0000000000000060
+[    4.896398] 000: x17: 0000000000000000 x16: 00000000000000eb
+[    4.896406] 000: x15: ffff000011712d18 x14: 7265746e69206465
+[    4.896414] 000: x13: ffff000010003ba0 x12: ffff000011712df0
+[    4.896422] 000: x11: 0000000000000001 x10: ffff000011712e08
+[    4.896430] 000: x9 : 0000000000000001 x8 : 000000000003c920
+[    4.896437] 000: x7 : ffff0000118cc410 x6 : ffff0000118c7f00
+[    4.896445] 000: x5 : 000000000003c920 x4 : 0000000000004510
+[    4.896453] 000: x3 : ffff000011712dc8 x2 : 0000000000000000
+[    4.896461] 000: x1 : 73a3f67df94c1500 x0 : 0000000000000000
+[    4.896466] 000: Call trace:
+[    4.896471] 000:  __handle_irq_event_percpu+0x154/0x168
+[    4.896481] 000:  handle_irq_event_percpu+0x50/0xb0
+[    4.896489] 000:  handle_irq_event+0x40/0x98
+[    4.896497] 000:  handle_simple_irq+0xa4/0xf0
+[    4.896505] 000:  generic_handle_irq+0x24/0x38
+[    4.896513] 000:  intr_complete+0xb0/0xe0
+[    4.896525] 000:  __usb_hcd_giveback_urb+0x58/0xd8
+[    4.896533] 000:  usb_giveback_urb_bh+0xd0/0x170
+[    4.896539] 000:  tasklet_action_common.isra.0+0x9c/0x128
+[    4.896549] 000:  tasklet_hi_action+0x24/0x30
+[    4.896556] 000:  __do_softirq+0x120/0x23c
+[    4.896564] 000:  irq_exit+0xb8/0xd8
+[    4.896571] 000:  __handle_domain_irq+0x64/0xb8
+[    4.896579] 000:  bcm2836_arm_irqchip_handle_irq+0x60/0xc0
+[    4.896586] 000:  el1_irq+0xb8/0x140
+[    4.896592] 000:  arch_cpu_idle+0x10/0x18
+[    4.896601] 000:  do_idle+0x200/0x280
+[    4.896608] 000:  cpu_startup_entry+0x20/0x28
+[    4.896615] 000:  rest_init+0xb4/0xc0
+[    4.896623] 000:  arch_call_rest_init+0xc/0x14
+[    4.896632] 000:  start_kernel+0x454/0x480
+
+Fixes: ed194d136769 ("usb: core: remove local_irq_save() around ->complete() handler")
+Cc: Woojung Huh <woojung.huh@microchip.com>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Andrew Lunn <andrew@lunn.ch>
+Cc: Stefan Wahren <wahrenst@gmx.net>
+Cc: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: David Miller <davem@davemloft.net>
+Signed-off-by: Daniel Wagner <dwagner@suse.de>
+Tested-by: Stefan Wahren <wahrenst@gmx.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/lan78xx.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -1278,8 +1278,11 @@ static void lan78xx_status(struct lan78x
+               netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
+               lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
+ 
+-              if (dev->domain_data.phyirq > 0)
++              if (dev->domain_data.phyirq > 0) {
++                      local_irq_disable();
+                       generic_handle_irq(dev->domain_data.phyirq);
++                      local_irq_enable();
++              }
+       } else
+               netdev_warn(dev->net,
+                           "unexpected interrupt: 0x%08x\n", intdata);
diff --git a/queue-4.19/net-use-skb_queue_empty_lockless-in-busy-poll-contexts.patch b/queue-4.19/net-use-skb_queue_empty_lockless-in-busy-poll-contexts.patch

new file mode 100644 (file)

index 0000000..c9dc821
--- /dev/null
+++ b/queue-4.19/net-use-skb_queue_empty_lockless-in-busy-poll-contexts.patch
@@ -0,0 +1,81 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Oct 2019 22:44:51 -0700
+Subject: net: use skb_queue_empty_lockless() in busy poll contexts
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3f926af3f4d688e2e11e7f8ed04e277a14d4d4a4 ]
+
+Busy polling usually runs without locks.
+Let's use skb_queue_empty_lockless() instead of skb_queue_empty()
+
+Also uses READ_ONCE() in __skb_try_recv_datagram() to address
+a similar potential problem.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/chelsio/chtls/chtls_io.c |    2 +-
+ net/core/datagram.c                     |    2 +-
+ net/core/sock.c                         |    2 +-
+ net/ipv4/tcp.c                          |    2 +-
+ net/sctp/socket.c                       |    2 +-
+ 5 files changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/crypto/chelsio/chtls/chtls_io.c
++++ b/drivers/crypto/chelsio/chtls/chtls_io.c
+@@ -1716,7 +1716,7 @@ int chtls_recvmsg(struct sock *sk, struc
+               return peekmsg(sk, msg, len, nonblock, flags);
+ 
+       if (sk_can_busy_loop(sk) &&
+-          skb_queue_empty(&sk->sk_receive_queue) &&
++          skb_queue_empty_lockless(&sk->sk_receive_queue) &&
+           sk->sk_state == TCP_ESTABLISHED)
+               sk_busy_loop(sk, nonblock);
+ 
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -279,7 +279,7 @@ struct sk_buff *__skb_try_recv_datagram(
+                       break;
+ 
+               sk_busy_loop(sk, flags & MSG_DONTWAIT);
+-      } while (sk->sk_receive_queue.prev != *last);
++      } while (READ_ONCE(sk->sk_receive_queue.prev) != *last);
+ 
+       error = -EAGAIN;
+ 
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -3483,7 +3483,7 @@ bool sk_busy_loop_end(void *p, unsigned
+ {
+       struct sock *sk = p;
+ 
+-      return !skb_queue_empty(&sk->sk_receive_queue) ||
++      return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
+              sk_busy_loop_timeout(sk, start_time);
+ }
+ EXPORT_SYMBOL(sk_busy_loop_end);
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1948,7 +1948,7 @@ int tcp_recvmsg(struct sock *sk, struct
+       if (unlikely(flags & MSG_ERRQUEUE))
+               return inet_recv_error(sk, msg, len, addr_len);
+ 
+-      if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
++      if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) &&
+           (sk->sk_state == TCP_ESTABLISHED))
+               sk_busy_loop(sk, nonblock);
+ 
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -8334,7 +8334,7 @@ struct sk_buff *sctp_skb_recv_datagram(s
+               if (sk_can_busy_loop(sk)) {
+                       sk_busy_loop(sk, noblock);
+ 
+-                      if (!skb_queue_empty(&sk->sk_receive_queue))
++                      if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+                               continue;
+               }
+ 
diff --git a/queue-4.19/net-use-skb_queue_empty_lockless-in-poll-handlers.patch b/queue-4.19/net-use-skb_queue_empty_lockless-in-poll-handlers.patch

new file mode 100644 (file)

index 0000000..92f4ff3
--- /dev/null
+++ b/queue-4.19/net-use-skb_queue_empty_lockless-in-poll-handlers.patch
@@ -0,0 +1,248 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Oct 2019 22:44:50 -0700
+Subject: net: use skb_queue_empty_lockless() in poll() handlers
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3ef7cf57c72f32f61e97f8fa401bc39ea1f1a5d4 ]
+
+Many poll() handlers are lockless. Using skb_queue_empty_lockless()
+instead of skb_queue_empty() is more appropriate.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/isdn/capi/capi.c     |    2 +-
+ net/atm/common.c             |    2 +-
+ net/bluetooth/af_bluetooth.c |    4 ++--
+ net/caif/caif_socket.c       |    2 +-
+ net/core/datagram.c          |    4 ++--
+ net/decnet/af_decnet.c       |    2 +-
+ net/ipv4/tcp.c               |    2 +-
+ net/ipv4/udp.c               |    2 +-
+ net/nfc/llcp_sock.c          |    4 ++--
+ net/phonet/socket.c          |    4 ++--
+ net/sctp/socket.c            |    4 ++--
+ net/tipc/socket.c            |    4 ++--
+ net/unix/af_unix.c           |    6 +++---
+ net/vmw_vsock/af_vsock.c     |    2 +-
+ 14 files changed, 22 insertions(+), 22 deletions(-)
+
+--- a/drivers/isdn/capi/capi.c
++++ b/drivers/isdn/capi/capi.c
+@@ -744,7 +744,7 @@ capi_poll(struct file *file, poll_table
+ 
+       poll_wait(file, &(cdev->recvwait), wait);
+       mask = EPOLLOUT | EPOLLWRNORM;
+-      if (!skb_queue_empty(&cdev->recvqueue))
++      if (!skb_queue_empty_lockless(&cdev->recvqueue))
+               mask |= EPOLLIN | EPOLLRDNORM;
+       return mask;
+ }
+--- a/net/atm/common.c
++++ b/net/atm/common.c
+@@ -667,7 +667,7 @@ __poll_t vcc_poll(struct file *file, str
+               mask |= EPOLLHUP;
+ 
+       /* readable? */
+-      if (!skb_queue_empty(&sk->sk_receive_queue))
++      if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+               mask |= EPOLLIN | EPOLLRDNORM;
+ 
+       /* writable? */
+--- a/net/bluetooth/af_bluetooth.c
++++ b/net/bluetooth/af_bluetooth.c
+@@ -460,7 +460,7 @@ __poll_t bt_sock_poll(struct file *file,
+       if (sk->sk_state == BT_LISTEN)
+               return bt_accept_poll(sk);
+ 
+-      if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
++      if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+               mask |= EPOLLERR |
+                       (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
+ 
+@@ -470,7 +470,7 @@ __poll_t bt_sock_poll(struct file *file,
+       if (sk->sk_shutdown == SHUTDOWN_MASK)
+               mask |= EPOLLHUP;
+ 
+-      if (!skb_queue_empty(&sk->sk_receive_queue))
++      if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+               mask |= EPOLLIN | EPOLLRDNORM;
+ 
+       if (sk->sk_state == BT_CLOSED)
+--- a/net/caif/caif_socket.c
++++ b/net/caif/caif_socket.c
+@@ -953,7 +953,7 @@ static __poll_t caif_poll(struct file *f
+               mask |= EPOLLRDHUP;
+ 
+       /* readable? */
+-      if (!skb_queue_empty(&sk->sk_receive_queue) ||
++      if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
+               (sk->sk_shutdown & RCV_SHUTDOWN))
+               mask |= EPOLLIN | EPOLLRDNORM;
+ 
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -842,7 +842,7 @@ __poll_t datagram_poll(struct file *file
+       mask = 0;
+ 
+       /* exceptional events? */
+-      if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
++      if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+               mask |= EPOLLERR |
+                       (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
+ 
+@@ -852,7 +852,7 @@ __poll_t datagram_poll(struct file *file
+               mask |= EPOLLHUP;
+ 
+       /* readable? */
+-      if (!skb_queue_empty(&sk->sk_receive_queue))
++      if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+               mask |= EPOLLIN | EPOLLRDNORM;
+ 
+       /* Connection-based need to check for termination and startup */
+--- a/net/decnet/af_decnet.c
++++ b/net/decnet/af_decnet.c
+@@ -1213,7 +1213,7 @@ static __poll_t dn_poll(struct file *fil
+       struct dn_scp *scp = DN_SK(sk);
+       __poll_t mask = datagram_poll(file, sock, wait);
+ 
+-      if (!skb_queue_empty(&scp->other_receive_queue))
++      if (!skb_queue_empty_lockless(&scp->other_receive_queue))
+               mask |= EPOLLRDBAND;
+ 
+       return mask;
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -595,7 +595,7 @@ __poll_t tcp_poll(struct file *file, str
+       }
+       /* This barrier is coupled with smp_wmb() in tcp_reset() */
+       smp_rmb();
+-      if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
++      if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+               mask |= EPOLLERR;
+ 
+       return mask;
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -2651,7 +2651,7 @@ __poll_t udp_poll(struct file *file, str
+       __poll_t mask = datagram_poll(file, sock, wait);
+       struct sock *sk = sock->sk;
+ 
+-      if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
++      if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
+               mask |= EPOLLIN | EPOLLRDNORM;
+ 
+       /* Check for false positives due to checksum errors */
+--- a/net/nfc/llcp_sock.c
++++ b/net/nfc/llcp_sock.c
+@@ -566,11 +566,11 @@ static __poll_t llcp_sock_poll(struct fi
+       if (sk->sk_state == LLCP_LISTEN)
+               return llcp_accept_poll(sk);
+ 
+-      if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
++      if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+               mask |= EPOLLERR |
+                       (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
+ 
+-      if (!skb_queue_empty(&sk->sk_receive_queue))
++      if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+               mask |= EPOLLIN | EPOLLRDNORM;
+ 
+       if (sk->sk_state == LLCP_CLOSED)
+--- a/net/phonet/socket.c
++++ b/net/phonet/socket.c
+@@ -351,9 +351,9 @@ static __poll_t pn_socket_poll(struct fi
+ 
+       if (sk->sk_state == TCP_CLOSE)
+               return EPOLLERR;
+-      if (!skb_queue_empty(&sk->sk_receive_queue))
++      if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+               mask |= EPOLLIN | EPOLLRDNORM;
+-      if (!skb_queue_empty(&pn->ctrlreq_queue))
++      if (!skb_queue_empty_lockless(&pn->ctrlreq_queue))
+               mask |= EPOLLPRI;
+       if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
+               return EPOLLHUP;
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -7939,7 +7939,7 @@ __poll_t sctp_poll(struct file *file, st
+       mask = 0;
+ 
+       /* Is there any exceptional events?  */
+-      if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
++      if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+               mask |= EPOLLERR |
+                       (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
+       if (sk->sk_shutdown & RCV_SHUTDOWN)
+@@ -7948,7 +7948,7 @@ __poll_t sctp_poll(struct file *file, st
+               mask |= EPOLLHUP;
+ 
+       /* Is it readable?  Reconsider this code with TCP-style support.  */
+-      if (!skb_queue_empty(&sk->sk_receive_queue))
++      if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+               mask |= EPOLLIN | EPOLLRDNORM;
+ 
+       /* The association is either gone or not ready.  */
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -731,7 +731,7 @@ static __poll_t tipc_poll(struct file *f
+               /* fall thru' */
+       case TIPC_LISTEN:
+       case TIPC_CONNECTING:
+-              if (!skb_queue_empty(&sk->sk_receive_queue))
++              if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+                       revents |= EPOLLIN | EPOLLRDNORM;
+               break;
+       case TIPC_OPEN:
+@@ -739,7 +739,7 @@ static __poll_t tipc_poll(struct file *f
+                       revents |= EPOLLOUT;
+               if (!tipc_sk_type_connectionless(sk))
+                       break;
+-              if (skb_queue_empty(&sk->sk_receive_queue))
++              if (skb_queue_empty_lockless(&sk->sk_receive_queue))
+                       break;
+               revents |= EPOLLIN | EPOLLRDNORM;
+               break;
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -2661,7 +2661,7 @@ static __poll_t unix_poll(struct file *f
+               mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
+ 
+       /* readable? */
+-      if (!skb_queue_empty(&sk->sk_receive_queue))
++      if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+               mask |= EPOLLIN | EPOLLRDNORM;
+ 
+       /* Connection-based need to check for termination and startup */
+@@ -2690,7 +2690,7 @@ static __poll_t unix_dgram_poll(struct f
+       mask = 0;
+ 
+       /* exceptional events? */
+-      if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
++      if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+               mask |= EPOLLERR |
+                       (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
+ 
+@@ -2700,7 +2700,7 @@ static __poll_t unix_dgram_poll(struct f
+               mask |= EPOLLHUP;
+ 
+       /* readable? */
+-      if (!skb_queue_empty(&sk->sk_receive_queue))
++      if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+               mask |= EPOLLIN | EPOLLRDNORM;
+ 
+       /* Connection-based need to check for termination and startup */
+--- a/net/vmw_vsock/af_vsock.c
++++ b/net/vmw_vsock/af_vsock.c
+@@ -873,7 +873,7 @@ static __poll_t vsock_poll(struct file *
+                * the queue and write as long as the socket isn't shutdown for
+                * sending.
+                */
+-              if (!skb_queue_empty(&sk->sk_receive_queue) ||
++              if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
+                   (sk->sk_shutdown & RCV_SHUTDOWN)) {
+                       mask |= EPOLLIN | EPOLLRDNORM;
+               }
diff --git a/queue-4.19/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch b/queue-4.19/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch

new file mode 100644 (file)

index 0000000..c2c92d1
--- /dev/null
+++ b/queue-4.19/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch
@@ -0,0 +1,36 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: zhanglin <zhang.lin16@zte.com.cn>
+Date: Sat, 26 Oct 2019 15:54:16 +0800
+Subject: net: Zeroing the structure ethtool_wolinfo in ethtool_get_wol()
+
+From: zhanglin <zhang.lin16@zte.com.cn>
+
+[ Upstream commit 5ff223e86f5addbfae26419cbb5d61d98f6fbf7d ]
+
+memset() the structure ethtool_wolinfo that has padded bytes
+but the padded bytes have not been zeroed out.
+
+Signed-off-by: zhanglin <zhang.lin16@zte.com.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/ethtool.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/core/ethtool.c
++++ b/net/core/ethtool.c
+@@ -1482,11 +1482,13 @@ static int ethtool_reset(struct net_devi
+ 
+ static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
+ {
+-      struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
++      struct ethtool_wolinfo wol;
+ 
+       if (!dev->ethtool_ops->get_wol)
+               return -EOPNOTSUPP;
+ 
++      memset(&wol, 0, sizeof(struct ethtool_wolinfo));
++      wol.cmd = ETHTOOL_GWOL;
+       dev->ethtool_ops->get_wol(dev, &wol);
+ 
+       if (copy_to_user(useraddr, &wol, sizeof(wol)))
diff --git a/queue-4.19/netns-fix-gfp-flags-in-rtnl_net_notifyid.patch b/queue-4.19/netns-fix-gfp-flags-in-rtnl_net_notifyid.patch

new file mode 100644 (file)

index 0000000..7341e95
--- /dev/null
+++ b/queue-4.19/netns-fix-gfp-flags-in-rtnl_net_notifyid.patch
@@ -0,0 +1,283 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Guillaume Nault <gnault@redhat.com>
+Date: Wed, 23 Oct 2019 18:39:04 +0200
+Subject: netns: fix GFP flags in rtnl_net_notifyid()
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit d4e4fdf9e4a27c87edb79b1478955075be141f67 ]
+
+In rtnl_net_notifyid(), we certainly can't pass a null GFP flag to
+rtnl_notify(). A GFP_KERNEL flag would be fine in most circumstances,
+but there are a few paths calling rtnl_net_notifyid() from atomic
+context or from RCU critical sections. The later also precludes the use
+of gfp_any() as it wouldn't detect the RCU case. Also, the nlmsg_new()
+call is wrong too, as it uses GFP_KERNEL unconditionally.
+
+Therefore, we need to pass the GFP flags as parameter and propagate it
+through function calls until the proper flags can be determined.
+
+In most cases, GFP_KERNEL is fine. The exceptions are:
+  * openvswitch: ovs_vport_cmd_get() and ovs_vport_cmd_dump()
+    indirectly call rtnl_net_notifyid() from RCU critical section,
+
+  * rtnetlink: rtmsg_ifinfo_build_skb() already receives GFP flags as
+    parameter.
+
+Also, in ovs_vport_cmd_build_info(), let's change the GFP flags used
+by nlmsg_new(). The function is allowed to sleep, so better make the
+flags consistent with the ones used in the following
+ovs_vport_cmd_fill_info() call.
+
+Found by code inspection.
+
+Fixes: 9a9634545c70 ("netns: notify netns id events")
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Acked-by: Pravin B Shelar <pshelar@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/net_namespace.h |    2 +-
+ net/core/dev.c              |    2 +-
+ net/core/net_namespace.c    |   17 +++++++++--------
+ net/core/rtnetlink.c        |   14 +++++++-------
+ net/openvswitch/datapath.c  |   20 +++++++++++---------
+ 5 files changed, 29 insertions(+), 26 deletions(-)
+
+--- a/include/net/net_namespace.h
++++ b/include/net/net_namespace.h
+@@ -322,7 +322,7 @@ static inline struct net *read_pnet(cons
+ #define __net_initconst       __initconst
+ #endif
+ 
+-int peernet2id_alloc(struct net *net, struct net *peer);
++int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp);
+ int peernet2id(struct net *net, struct net *peer);
+ bool peernet_has_id(struct net *net, struct net *peer);
+ struct net *get_net_ns_by_id(struct net *net, int id);
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -9211,7 +9211,7 @@ int dev_change_net_namespace(struct net_
+       call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+       rcu_barrier();
+ 
+-      new_nsid = peernet2id_alloc(dev_net(dev), net);
++      new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
+       /* If there is an ifindex conflict assign a new one */
+       if (__dev_get_by_index(net, dev->ifindex))
+               new_ifindex = dev_new_index(net);
+--- a/net/core/net_namespace.c
++++ b/net/core/net_namespace.c
+@@ -226,11 +226,11 @@ static int __peernet2id(struct net *net,
+       return __peernet2id_alloc(net, peer, &no);
+ }
+ 
+-static void rtnl_net_notifyid(struct net *net, int cmd, int id);
++static void rtnl_net_notifyid(struct net *net, int cmd, int id, gfp_t gfp);
+ /* This function returns the id of a peer netns. If no id is assigned, one will
+  * be allocated and returned.
+  */
+-int peernet2id_alloc(struct net *net, struct net *peer)
++int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
+ {
+       bool alloc = false, alive = false;
+       int id;
+@@ -249,7 +249,7 @@ int peernet2id_alloc(struct net *net, st
+       id = __peernet2id_alloc(net, peer, &alloc);
+       spin_unlock_bh(&net->nsid_lock);
+       if (alloc && id >= 0)
+-              rtnl_net_notifyid(net, RTM_NEWNSID, id);
++              rtnl_net_notifyid(net, RTM_NEWNSID, id, gfp);
+       if (alive)
+               put_net(peer);
+       return id;
+@@ -495,7 +495,8 @@ static void unhash_nsid(struct net *net,
+                       idr_remove(&tmp->netns_ids, id);
+               spin_unlock_bh(&tmp->nsid_lock);
+               if (id >= 0)
+-                      rtnl_net_notifyid(tmp, RTM_DELNSID, id);
++                      rtnl_net_notifyid(tmp, RTM_DELNSID, id,
++                                        GFP_KERNEL);
+               if (tmp == last)
+                       break;
+       }
+@@ -720,7 +721,7 @@ static int rtnl_net_newid(struct sk_buff
+       err = alloc_netid(net, peer, nsid);
+       spin_unlock_bh(&net->nsid_lock);
+       if (err >= 0) {
+-              rtnl_net_notifyid(net, RTM_NEWNSID, err);
++              rtnl_net_notifyid(net, RTM_NEWNSID, err, GFP_KERNEL);
+               err = 0;
+       } else if (err == -ENOSPC && nsid >= 0) {
+               err = -EEXIST;
+@@ -862,12 +863,12 @@ static int rtnl_net_dumpid(struct sk_buf
+       return skb->len;
+ }
+ 
+-static void rtnl_net_notifyid(struct net *net, int cmd, int id)
++static void rtnl_net_notifyid(struct net *net, int cmd, int id, gfp_t gfp)
+ {
+       struct sk_buff *msg;
+       int err = -ENOMEM;
+ 
+-      msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
++      msg = nlmsg_new(rtnl_net_get_size(), gfp);
+       if (!msg)
+               goto out;
+ 
+@@ -875,7 +876,7 @@ static void rtnl_net_notifyid(struct net
+       if (err < 0)
+               goto err_out;
+ 
+-      rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
++      rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, gfp);
+       return;
+ 
+ err_out:
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -1519,7 +1519,7 @@ static noinline_for_stack int nla_put_if
+ 
+ static int rtnl_fill_link_netnsid(struct sk_buff *skb,
+                                 const struct net_device *dev,
+-                                struct net *src_net)
++                                struct net *src_net, gfp_t gfp)
+ {
+       bool put_iflink = false;
+ 
+@@ -1527,7 +1527,7 @@ static int rtnl_fill_link_netnsid(struct
+               struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
+ 
+               if (!net_eq(dev_net(dev), link_net)) {
+-                      int id = peernet2id_alloc(src_net, link_net);
++                      int id = peernet2id_alloc(src_net, link_net, gfp);
+ 
+                       if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
+                               return -EMSGSIZE;
+@@ -1585,7 +1585,7 @@ static int rtnl_fill_ifinfo(struct sk_bu
+                           int type, u32 pid, u32 seq, u32 change,
+                           unsigned int flags, u32 ext_filter_mask,
+                           u32 event, int *new_nsid, int new_ifindex,
+-                          int tgt_netnsid)
++                          int tgt_netnsid, gfp_t gfp)
+ {
+       struct ifinfomsg *ifm;
+       struct nlmsghdr *nlh;
+@@ -1677,7 +1677,7 @@ static int rtnl_fill_ifinfo(struct sk_bu
+                       goto nla_put_failure;
+       }
+ 
+-      if (rtnl_fill_link_netnsid(skb, dev, src_net))
++      if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp))
+               goto nla_put_failure;
+ 
+       if (new_nsid &&
+@@ -1933,7 +1933,7 @@ static int rtnl_dump_ifinfo(struct sk_bu
+                                              cb->nlh->nlmsg_seq, 0,
+                                              flags,
+                                              ext_filter_mask, 0, NULL, 0,
+-                                             netnsid);
++                                             netnsid, GFP_KERNEL);
+ 
+                       if (err < 0) {
+                               if (likely(skb->len))
+@@ -3215,7 +3215,7 @@ static int rtnl_getlink(struct sk_buff *
+       err = rtnl_fill_ifinfo(nskb, dev, net,
+                              RTM_NEWLINK, NETLINK_CB(skb).portid,
+                              nlh->nlmsg_seq, 0, 0, ext_filter_mask,
+-                             0, NULL, 0, netnsid);
++                             0, NULL, 0, netnsid, GFP_KERNEL);
+       if (err < 0) {
+               /* -EMSGSIZE implies BUG in if_nlmsg_size */
+               WARN_ON(err == -EMSGSIZE);
+@@ -3325,7 +3325,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(i
+ 
+       err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
+                              type, 0, 0, change, 0, 0, event,
+-                             new_nsid, new_ifindex, -1);
++                             new_nsid, new_ifindex, -1, flags);
+       if (err < 0) {
+               /* -EMSGSIZE implies BUG in if_nlmsg_size() */
+               WARN_ON(err == -EMSGSIZE);
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -1843,7 +1843,7 @@ static struct genl_family dp_datapath_ge
+ /* Called with ovs_mutex or RCU read lock. */
+ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
+                                  struct net *net, u32 portid, u32 seq,
+-                                 u32 flags, u8 cmd)
++                                 u32 flags, u8 cmd, gfp_t gfp)
+ {
+       struct ovs_header *ovs_header;
+       struct ovs_vport_stats vport_stats;
+@@ -1864,7 +1864,7 @@ static int ovs_vport_cmd_fill_info(struc
+               goto nla_put_failure;
+ 
+       if (!net_eq(net, dev_net(vport->dev))) {
+-              int id = peernet2id_alloc(net, dev_net(vport->dev));
++              int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
+ 
+               if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
+                       goto nla_put_failure;
+@@ -1905,11 +1905,12 @@ struct sk_buff *ovs_vport_cmd_build_info
+       struct sk_buff *skb;
+       int retval;
+ 
+-      skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
++      skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!skb)
+               return ERR_PTR(-ENOMEM);
+ 
+-      retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
++      retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
++                                       GFP_KERNEL);
+       BUG_ON(retval < 0);
+ 
+       return skb;
+@@ -2042,7 +2043,7 @@ restart:
+ 
+       err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+                                     info->snd_portid, info->snd_seq, 0,
+-                                    OVS_VPORT_CMD_NEW);
++                                    OVS_VPORT_CMD_NEW, GFP_KERNEL);
+ 
+       if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
+               update_headroom(dp);
+@@ -2101,7 +2102,7 @@ static int ovs_vport_cmd_set(struct sk_b
+ 
+       err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+                                     info->snd_portid, info->snd_seq, 0,
+-                                    OVS_VPORT_CMD_NEW);
++                                    OVS_VPORT_CMD_NEW, GFP_ATOMIC);
+       BUG_ON(err < 0);
+ 
+       ovs_unlock();
+@@ -2140,7 +2141,7 @@ static int ovs_vport_cmd_del(struct sk_b
+ 
+       err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+                                     info->snd_portid, info->snd_seq, 0,
+-                                    OVS_VPORT_CMD_DEL);
++                                    OVS_VPORT_CMD_DEL, GFP_KERNEL);
+       BUG_ON(err < 0);
+ 
+       /* the vport deletion may trigger dp headroom update */
+@@ -2182,7 +2183,7 @@ static int ovs_vport_cmd_get(struct sk_b
+               goto exit_unlock_free;
+       err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+                                     info->snd_portid, info->snd_seq, 0,
+-                                    OVS_VPORT_CMD_NEW);
++                                    OVS_VPORT_CMD_NEW, GFP_ATOMIC);
+       BUG_ON(err < 0);
+       rcu_read_unlock();
+ 
+@@ -2218,7 +2219,8 @@ static int ovs_vport_cmd_dump(struct sk_
+                                                   NETLINK_CB(cb->skb).portid,
+                                                   cb->nlh->nlmsg_seq,
+                                                   NLM_F_MULTI,
+-                                                  OVS_VPORT_CMD_NEW) < 0)
++                                                  OVS_VPORT_CMD_NEW,
++                                                  GFP_ATOMIC) < 0)
+                               goto out;
+ 
+                       j++;
diff --git a/queue-4.19/r8152-add-device-id-for-lenovo-thinkpad-usb-c-dock-gen-2.patch b/queue-4.19/r8152-add-device-id-for-lenovo-thinkpad-usb-c-dock-gen-2.patch

new file mode 100644 (file)

index 0000000..bcc8e31
--- /dev/null
+++ b/queue-4.19/r8152-add-device-id-for-lenovo-thinkpad-usb-c-dock-gen-2.patch
@@ -0,0 +1,48 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Kazutoshi Noguchi <noguchi.kazutosi@gmail.com>
+Date: Mon, 21 Oct 2019 00:03:07 +0900
+Subject: r8152: add device id for Lenovo ThinkPad USB-C Dock Gen 2
+
+From: Kazutoshi Noguchi <noguchi.kazutosi@gmail.com>
+
+[ Upstream commit b3060531979422d5bb18d80226f978910284dc70 ]
+
+This device is sold as 'ThinkPad USB-C Dock Gen 2 (40AS)'.
+Chipset is RTL8153 and works with r8152.
+Without this, the generic cdc_ether grabs the device, and the device jam
+connected networks up when the machine suspends.
+
+Signed-off-by: Kazutoshi Noguchi <noguchi.kazutosi@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/cdc_ether.c |    7 +++++++
+ drivers/net/usb/r8152.c     |    1 +
+ 2 files changed, 8 insertions(+)
+
+--- a/drivers/net/usb/cdc_ether.c
++++ b/drivers/net/usb/cdc_ether.c
+@@ -800,6 +800,13 @@ static const struct usb_device_id produc
+       .driver_info = 0,
+ },
+ 
++/* ThinkPad USB-C Dock Gen 2 (based on Realtek RTL8153) */
++{
++      USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa387, USB_CLASS_COMM,
++                      USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
++      .driver_info = 0,
++},
++
+ /* NVIDIA Tegra USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */
+ {
+       USB_DEVICE_AND_INTERFACE_INFO(NVIDIA_VENDOR_ID, 0x09ff, USB_CLASS_COMM,
+--- a/drivers/net/usb/r8152.c
++++ b/drivers/net/usb/r8152.c
+@@ -5339,6 +5339,7 @@ static const struct usb_device_id rtl815
+       {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7205)},
+       {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x720c)},
+       {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7214)},
++      {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0xa387)},
+       {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)},
+       {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA,  0x09ff)},
+       {REALTEK_USB_DEVICE(VENDOR_ID_TPLINK,  0x0601)},
diff --git a/queue-4.19/r8169-fix-wrong-phy-id-issue-with-rtl8168dp.patch b/queue-4.19/r8169-fix-wrong-phy-id-issue-with-rtl8168dp.patch

new file mode 100644 (file)

index 0000000..68d0063
--- /dev/null
+++ b/queue-4.19/r8169-fix-wrong-phy-id-issue-with-rtl8168dp.patch
@@ -0,0 +1,39 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Heiner Kallweit <hkallweit1@gmail.com>
+Date: Fri, 1 Nov 2019 00:10:21 +0100
+Subject: r8169: fix wrong PHY ID issue with RTL8168dp
+
+From: Heiner Kallweit <hkallweit1@gmail.com>
+
+[ Upstream commit 62bdc8fd1c21d4263ebd18bec57f82532d09249f ]
+
+As reported in [0] at least one RTL8168dp version has problems
+establishing a link. This chip version has an integrated RTL8211b PHY,
+however the chip seems to report a wrong PHY ID, resulting in a wrong
+PHY driver (for Generic Realtek PHY) being loaded.
+Work around this issue by adding a hook to r8168dp_2_mdio_read()
+for returning the correct PHY ID.
+
+[0] https://bbs.archlinux.org/viewtopic.php?id=246508
+
+Fixes: 242cd9b5866a ("r8169: use phy_resume/phy_suspend")
+Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/r8169.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/realtek/r8169.c
++++ b/drivers/net/ethernet/realtek/r8169.c
+@@ -1010,6 +1010,10 @@ static int r8168dp_2_mdio_read(struct rt
+ {
+       int value;
+ 
++      /* Work around issue with chip reporting wrong PHY ID */
++      if (reg == MII_PHYSID2)
++              return 0xc912;
++
+       r8168dp_2_mdio_start(tp);
+ 
+       value = r8169_mdio_read(tp, reg);
diff --git a/queue-4.19/selftests-fib_tests-add-more-tests-for-metric-update.patch b/queue-4.19/selftests-fib_tests-add-more-tests-for-metric-update.patch

new file mode 100644 (file)

index 0000000..baf54ff
--- /dev/null
+++ b/queue-4.19/selftests-fib_tests-add-more-tests-for-metric-update.patch
@@ -0,0 +1,51 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Sat, 26 Oct 2019 11:53:40 +0200
+Subject: selftests: fib_tests: add more tests for metric update
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 37de3b354150450ba12275397155e68113e99901 ]
+
+This patch adds two more tests to ipv4_addr_metric_test() to
+explicitly cover the scenarios fixed by the previous patch.
+
+Suggested-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/fib_tests.sh |   21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+--- a/tools/testing/selftests/net/fib_tests.sh
++++ b/tools/testing/selftests/net/fib_tests.sh
+@@ -1301,6 +1301,27 @@ ipv4_addr_metric_test()
+       fi
+       log_test $rc 0 "Prefix route with metric on link up"
+ 
++      # explicitly check for metric changes on edge scenarios
++      run_cmd "$IP addr flush dev dummy2"
++      run_cmd "$IP addr add dev dummy2 172.16.104.0/24 metric 259"
++      run_cmd "$IP addr change dev dummy2 172.16.104.0/24 metric 260"
++      rc=$?
++      if [ $rc -eq 0 ]; then
++              check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.0 metric 260"
++              rc=$?
++      fi
++      log_test $rc 0 "Modify metric of .0/24 address"
++
++      run_cmd "$IP addr flush dev dummy2"
++      run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260"
++      run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261"
++      rc=$?
++      if [ $rc -eq 0 ]; then
++              check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
++              rc=$?
++      fi
++      log_test $rc 0 "Modify metric of address with peer route"
++
+       $IP li del dummy1
+       $IP li del dummy2
+       cleanup
diff --git a/queue-4.19/selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch b/queue-4.19/selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch

new file mode 100644 (file)

index 0000000..62da11c
--- /dev/null
+++ b/queue-4.19/selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch
@@ -0,0 +1,44 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Wei Wang <weiwan@google.com>
+Date: Thu, 31 Oct 2019 16:24:36 -0700
+Subject: selftests: net: reuseport_dualstack: fix uninitalized parameter
+
+From: Wei Wang <weiwan@google.com>
+
+[ Upstream commit d64479a3e3f9924074ca7b50bd72fa5211dca9c1 ]
+
+This test reports EINVAL for getsockopt(SOL_SOCKET, SO_DOMAIN)
+occasionally due to the uninitialized length parameter.
+Initialize it to fix this, and also use int for "test_family" to comply
+with the API standard.
+
+Fixes: d6a61f80b871 ("soreuseport: test mixed v4/v6 sockets")
+Reported-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Wei Wang <weiwan@google.com>
+Cc: Craig Gallek <cgallek@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/reuseport_dualstack.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/net/reuseport_dualstack.c
++++ b/tools/testing/selftests/net/reuseport_dualstack.c
+@@ -129,7 +129,7 @@ static void test(int *rcv_fds, int count
+ {
+       struct epoll_event ev;
+       int epfd, i, test_fd;
+-      uint16_t test_family;
++      int test_family;
+       socklen_t len;
+ 
+       epfd = epoll_create(1);
+@@ -146,6 +146,7 @@ static void test(int *rcv_fds, int count
+       send_from_v4(proto);
+ 
+       test_fd = receive_once(epfd, proto);
++      len = sizeof(test_family);
+       if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len))
+               error(1, errno, "failed to read socket domain");
+       if (test_family != AF_INET)
diff --git a/queue-4.19/series b/queue-4.19/series

index cec7f7e64865b31cd632bf216482e729b9643950..16a5aaa6787bc1b3cdd3d77b8c3b10c4e702ab3f 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -34,3 +34,39 @@ i2c-stm32f7-remove-warning-when-compiling-with-w-1.patch
  cifs-fix-cifsinodeinfo-lock_sem-deadlock-when-reconn.patch
  nbd-protect-cmd-status-with-cmd-lock.patch
  nbd-handle-racing-with-error-ed-out-commands.patch
+cxgb4-fix-panic-when-attaching-to-uld-fail.patch
+dccp-do-not-leak-jiffies-on-the-wire.patch
+erspan-fix-the-tun_info-options_len-check-for-erspan.patch
+inet-stop-leaking-jiffies-on-the-wire.patch
+net-annotate-accesses-to-sk-sk_incoming_cpu.patch
+net-annotate-lockless-accesses-to-sk-sk_napi_id.patch
+net-dsa-bcm_sf2-fix-imp-setup-for-port-different-than-8.patch
+net-ethernet-ftgmac100-fix-dma-coherency-issue-with-sw-checksum.patch
+net-fix-sk_page_frag-recursion-from-memory-reclaim.patch
+net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch
+net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch
+netns-fix-gfp-flags-in-rtnl_net_notifyid.patch
+net-usb-lan78xx-disable-interrupts-before-calling-generic_handle_irq.patch
+net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch
+selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch
+udp-fix-data-race-in-udp_set_dev_scratch.patch
+vxlan-check-tun_info-options_len-properly.patch
+net-add-skb_queue_empty_lockless.patch
+udp-use-skb_queue_empty_lockless.patch
+net-use-skb_queue_empty_lockless-in-poll-handlers.patch
+net-use-skb_queue_empty_lockless-in-busy-poll-contexts.patch
+net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch
+ipv4-fix-route-update-on-metric-change.patch
+selftests-fib_tests-add-more-tests-for-metric-update.patch
+net-mlx5e-fix-handling-of-compressed-cqes-in-case-of-low-napi-budget.patch
+r8169-fix-wrong-phy-id-issue-with-rtl8168dp.patch
+net-mlx5e-fix-ethtool-self-test-link-speed.patch
+net-dsa-b53-do-not-clear-existing-mirrored-port-mask.patch
+net-bcmgenet-don-t-set-phydev-link-from-mac.patch
+net-phy-bcm7xxx-define-soft_reset-for-40nm-ephy.patch
+net-bcmgenet-soft-reset-40nm-ephys-before-mac-init.patch
+net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch
+net-usb-lan78xx-connect-phy-before-registering-mac.patch
+net-dsa-fix-switch-tree-list.patch
+r8152-add-device-id-for-lenovo-thinkpad-usb-c-dock-gen-2.patch
+net-flow_dissector-switch-to-siphash.patch
diff --git a/queue-4.19/udp-fix-data-race-in-udp_set_dev_scratch.patch b/queue-4.19/udp-fix-data-race-in-udp_set_dev_scratch.patch

new file mode 100644 (file)

index 0000000..17d979e
--- /dev/null
+++ b/queue-4.19/udp-fix-data-race-in-udp_set_dev_scratch.patch
@@ -0,0 +1,102 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 24 Oct 2019 11:43:31 -0700
+Subject: udp: fix data-race in udp_set_dev_scratch()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a793183caa9afae907a0d7ddd2ffd57329369bf5 ]
+
+KCSAN reported a data-race in udp_set_dev_scratch() [1]
+
+The issue here is that we must not write over skb fields
+if skb is shared. A similar issue has been fixed in commit
+89c22d8c3b27 ("net: Fix skb csum races when peeking")
+
+While we are at it, use a helper only dealing with
+udp_skb_scratch(skb)->csum_unnecessary, as this allows
+udp_set_dev_scratch() to be called once and thus inlined.
+
+[1]
+BUG: KCSAN: data-race in udp_set_dev_scratch / udpv6_recvmsg
+
+write to 0xffff888120278317 of 1 bytes by task 10411 on cpu 1:
+ udp_set_dev_scratch+0xea/0x200 net/ipv4/udp.c:1308
+ __first_packet_length+0x147/0x420 net/ipv4/udp.c:1556
+ first_packet_length+0x68/0x2a0 net/ipv4/udp.c:1579
+ udp_poll+0xea/0x110 net/ipv4/udp.c:2720
+ sock_poll+0xed/0x250 net/socket.c:1256
+ vfs_poll include/linux/poll.h:90 [inline]
+ do_select+0x7d0/0x1020 fs/select.c:534
+ core_sys_select+0x381/0x550 fs/select.c:677
+ do_pselect.constprop.0+0x11d/0x160 fs/select.c:759
+ __do_sys_pselect6 fs/select.c:784 [inline]
+ __se_sys_pselect6 fs/select.c:769 [inline]
+ __x64_sys_pselect6+0x12e/0x170 fs/select.c:769
+ do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+read to 0xffff888120278317 of 1 bytes by task 10413 on cpu 0:
+ udp_skb_csum_unnecessary include/net/udp.h:358 [inline]
+ udpv6_recvmsg+0x43e/0xe90 net/ipv6/udp.c:310
+ inet6_recvmsg+0xbb/0x240 net/ipv6/af_inet6.c:592
+ sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871
+ ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480
+ do_recvmmsg+0x19a/0x5c0 net/socket.c:2601
+ __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680
+ __do_sys_recvmmsg net/socket.c:2703 [inline]
+ __se_sys_recvmmsg net/socket.c:2696 [inline]
+ __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696
+ do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 10413 Comm: syz-executor.0 Not tainted 5.4.0-rc3+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Fixes: 2276f58ac589 ("udp: use a separate rx queue for packet reception")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c |   19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1270,6 +1270,20 @@ static void udp_set_dev_scratch(struct s
+               scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
+ }
+ 
++static void udp_skb_csum_unnecessary_set(struct sk_buff *skb)
++{
++      /* We come here after udp_lib_checksum_complete() returned 0.
++       * This means that __skb_checksum_complete() might have
++       * set skb->csum_valid to 1.
++       * On 64bit platforms, we can set csum_unnecessary
++       * to true, but only if the skb is not shared.
++       */
++#if BITS_PER_LONG == 64
++      if (!skb_shared(skb))
++              udp_skb_scratch(skb)->csum_unnecessary = true;
++#endif
++}
++
+ static int udp_skb_truesize(struct sk_buff *skb)
+ {
+       return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
+@@ -1504,10 +1518,7 @@ static struct sk_buff *__first_packet_le
+                       *total += skb->truesize;
+                       kfree_skb(skb);
+               } else {
+-                      /* the csum related bits could be changed, refresh
+-                       * the scratch area
+-                       */
+-                      udp_set_dev_scratch(skb);
++                      udp_skb_csum_unnecessary_set(skb);
+                       break;
+               }
+       }
diff --git a/queue-4.19/udp-use-skb_queue_empty_lockless.patch b/queue-4.19/udp-use-skb_queue_empty_lockless.patch

new file mode 100644 (file)

index 0000000..518eff9
--- /dev/null
+++ b/queue-4.19/udp-use-skb_queue_empty_lockless.patch
@@ -0,0 +1,98 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Oct 2019 22:44:49 -0700
+Subject: udp: use skb_queue_empty_lockless()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 137a0dbe3426fd7bcfe3f8117b36a87b3590e4eb ]
+
+syzbot reported a data-race [1].
+
+We should use skb_queue_empty_lockless() to document that we are
+not ensuring a mutual exclusion and silence KCSAN.
+
+[1]
+BUG: KCSAN: data-race in __skb_recv_udp / __udp_enqueue_schedule_skb
+
+write to 0xffff888122474b50 of 8 bytes by interrupt on cpu 0:
+ __skb_insert include/linux/skbuff.h:1852 [inline]
+ __skb_queue_before include/linux/skbuff.h:1958 [inline]
+ __skb_queue_tail include/linux/skbuff.h:1991 [inline]
+ __udp_enqueue_schedule_skb+0x2c1/0x410 net/ipv4/udp.c:1470
+ __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline]
+ udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057
+ udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074
+ udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233
+ __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300
+ udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470
+ ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204
+ ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
+ dst_input include/net/dst.h:442 [inline]
+ ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+
+read to 0xffff888122474b50 of 8 bytes by task 8921 on cpu 1:
+ skb_queue_empty include/linux/skbuff.h:1494 [inline]
+ __skb_recv_udp+0x18d/0x500 net/ipv4/udp.c:1653
+ udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712
+ inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838
+ sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871
+ ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480
+ do_recvmmsg+0x19a/0x5c0 net/socket.c:2601
+ __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680
+ __do_sys_recvmmsg net/socket.c:2703 [inline]
+ __se_sys_recvmmsg net/socket.c:2696 [inline]
+ __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696
+ do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 8921 Comm: syz-executor.4 Not tainted 5.4.0-rc3+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1542,7 +1542,7 @@ static int first_packet_length(struct so
+ 
+       spin_lock_bh(&rcvq->lock);
+       skb = __first_packet_length(sk, rcvq, &total);
+-      if (!skb && !skb_queue_empty(sk_queue)) {
++      if (!skb && !skb_queue_empty_lockless(sk_queue)) {
+               spin_lock(&sk_queue->lock);
+               skb_queue_splice_tail_init(sk_queue, rcvq);
+               spin_unlock(&sk_queue->lock);
+@@ -1617,7 +1617,7 @@ struct sk_buff *__skb_recv_udp(struct so
+                               return skb;
+                       }
+ 
+-                      if (skb_queue_empty(sk_queue)) {
++                      if (skb_queue_empty_lockless(sk_queue)) {
+                               spin_unlock_bh(&queue->lock);
+                               goto busy_check;
+                       }
+@@ -1644,7 +1644,7 @@ busy_check:
+                               break;
+ 
+                       sk_busy_loop(sk, flags & MSG_DONTWAIT);
+-              } while (!skb_queue_empty(sk_queue));
++              } while (!skb_queue_empty_lockless(sk_queue));
+ 
+               /* sk_queue is empty, reader_queue may contain peeked packets */
+       } while (timeo &&
diff --git a/queue-4.19/vxlan-check-tun_info-options_len-properly.patch b/queue-4.19/vxlan-check-tun_info-options_len-properly.patch

new file mode 100644 (file)

index 0000000..04d48bf
--- /dev/null
+++ b/queue-4.19/vxlan-check-tun_info-options_len-properly.patch
@@ -0,0 +1,38 @@
+From foo@baz Wed 06 Nov 2019 03:31:22 PM CET
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 29 Oct 2019 01:24:32 +0800
+Subject: vxlan: check tun_info options_len properly
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit eadf52cf1852196a1363044dcda22fa5d7f296f7 ]
+
+This patch is to improve the tun_info options_len by dropping
+the skb when TUNNEL_VXLAN_OPT is set but options_len is less
+than vxlan_metadata. This can void a potential out-of-bounds
+access on ip_tun_info.
+
+Fixes: ee122c79d422 ("vxlan: Flow based tunneling")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -2174,9 +2174,11 @@ static void vxlan_xmit_one(struct sk_buf
+               vni = tunnel_id_to_key32(info->key.tun_id);
+               ifindex = 0;
+               dst_cache = &info->dst_cache;
+-              if (info->options_len &&
+-                  info->key.tun_flags & TUNNEL_VXLAN_OPT)
++              if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
++                      if (info->options_len < sizeof(*md))
++                              goto drop;
+                       md = ip_tunnel_info_opts(info);
++              }
+               ttl = info->key.ttl;
+               tos = info->key.tos;
+               label = info->key.label;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 6 Nov 2019 14:33:34 +0000 (15:33 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 6 Nov 2019 14:33:34 +0000 (15:33 +0100)
queue-4.19/cxgb4-fix-panic-when-attaching-to-uld-fail.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/dccp-do-not-leak-jiffies-on-the-wire.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/erspan-fix-the-tun_info-options_len-check-for-erspan.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/inet-stop-leaking-jiffies-on-the-wire.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/ipv4-fix-route-update-on-metric-change.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-add-skb_queue_empty_lockless.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-annotate-accesses-to-sk-sk_incoming_cpu.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-annotate-lockless-accesses-to-sk-sk_napi_id.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-bcmgenet-don-t-set-phydev-link-from-mac.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-bcmgenet-soft-reset-40nm-ephys-before-mac-init.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-dsa-b53-do-not-clear-existing-mirrored-port-mask.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-dsa-bcm_sf2-fix-imp-setup-for-port-different-than-8.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-dsa-fix-switch-tree-list.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-ethernet-ftgmac100-fix-dma-coherency-issue-with-sw-checksum.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-flow_dissector-switch-to-siphash.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-mlx5e-fix-ethtool-self-test-link-speed.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-mlx5e-fix-handling-of-compressed-cqes-in-case-of-low-napi-budget.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-phy-bcm7xxx-define-soft_reset-for-40nm-ephy.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-usb-lan78xx-connect-phy-before-registering-mac.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-usb-lan78xx-disable-interrupts-before-calling-generic_handle_irq.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-use-skb_queue_empty_lockless-in-busy-poll-contexts.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-use-skb_queue_empty_lockless-in-poll-handlers.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/netns-fix-gfp-flags-in-rtnl_net_notifyid.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/r8152-add-device-id-for-lenovo-thinkpad-usb-c-dock-gen-2.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/r8169-fix-wrong-phy-id-issue-with-rtl8168dp.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/selftests-fib_tests-add-more-tests-for-metric-update.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history
queue-4.19/udp-fix-data-race-in-udp_set_dev_scratch.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/udp-use-skb_queue_empty_lockless.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/vxlan-check-tun_info-options_len-properly.patch	[new file with mode: 0644]	patch \| blob