]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 6 Nov 2019 17:10:51 +0000 (18:10 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 6 Nov 2019 17:10:51 +0000 (18:10 +0100)
added patches:
dccp-do-not-leak-jiffies-on-the-wire.patch
inet-stop-leaking-jiffies-on-the-wire.patch
net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch
net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch
net-dsa-fix-switch-tree-list.patch
net-fix-sk_page_frag-recursion-from-memory-reclaim.patch
net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch
net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch
net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch
selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch
vxlan-check-tun_info-options_len-properly.patch

12 files changed:
queue-4.9/dccp-do-not-leak-jiffies-on-the-wire.patch [new file with mode: 0644]
queue-4.9/inet-stop-leaking-jiffies-on-the-wire.patch [new file with mode: 0644]
queue-4.9/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch [new file with mode: 0644]
queue-4.9/net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch [new file with mode: 0644]
queue-4.9/net-dsa-fix-switch-tree-list.patch [new file with mode: 0644]
queue-4.9/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch [new file with mode: 0644]
queue-4.9/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch [new file with mode: 0644]
queue-4.9/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch [new file with mode: 0644]
queue-4.9/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch [new file with mode: 0644]
queue-4.9/selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/vxlan-check-tun_info-options_len-properly.patch [new file with mode: 0644]

diff --git a/queue-4.9/dccp-do-not-leak-jiffies-on-the-wire.patch b/queue-4.9/dccp-do-not-leak-jiffies-on-the-wire.patch
new file mode 100644 (file)
index 0000000..a3ab50f
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Wed 06 Nov 2019 04:45:30 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 4 Nov 2019 07:57:55 -0800
+Subject: dccp: do not leak jiffies on the wire
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3d1e5039f5f87a8731202ceca08764ee7cb010d3 ]
+
+For some reason I missed the case of DCCP passive
+flows in my previous patch.
+
+Fixes: a904a0693c18 ("inet: stop leaking jiffies on the wire")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Thiemo Nagel <tnagel@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv4.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -417,7 +417,7 @@ struct sock *dccp_v4_request_recv_sock(c
+       RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
+       newinet->mc_index  = inet_iif(skb);
+       newinet->mc_ttl    = ip_hdr(skb)->ttl;
+-      newinet->inet_id   = jiffies;
++      newinet->inet_id   = prandom_u32();
+       if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
+               goto put_and_exit;
diff --git a/queue-4.9/inet-stop-leaking-jiffies-on-the-wire.patch b/queue-4.9/inet-stop-leaking-jiffies-on-the-wire.patch
new file mode 100644 (file)
index 0000000..5b2cb7f
--- /dev/null
@@ -0,0 +1,94 @@
+From foo@baz Wed 06 Nov 2019 04:08:37 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 1 Nov 2019 10:32:19 -0700
+Subject: inet: stop leaking jiffies on the wire
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a904a0693c189691eeee64f6c6b188bd7dc244e9 ]
+
+Historically linux tried to stick to RFC 791, 1122, 2003
+for IPv4 ID field generation.
+
+RFC 6864 made clear that no matter how hard we try,
+we can not ensure unicity of IP ID within maximum
+lifetime for all datagrams with a given source
+address/destination address/protocol tuple.
+
+Linux uses a per socket inet generator (inet_id), initialized
+at connection startup with a XOR of 'jiffies' and other
+fields that appear clear on the wire.
+
+Thiemo Nagel pointed that this strategy is a privacy
+concern as this provides 16 bits of entropy to fingerprint
+devices.
+
+Let's switch to a random starting point, this is just as
+good as far as RFC 6864 is concerned and does not leak
+anything critical.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Thiemo Nagel <tnagel@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv4.c     |    2 +-
+ net/ipv4/datagram.c |    2 +-
+ net/ipv4/tcp_ipv4.c |    4 ++--
+ net/sctp/socket.c   |    2 +-
+ 4 files changed, 5 insertions(+), 5 deletions(-)
+
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -121,7 +121,7 @@ int dccp_v4_connect(struct sock *sk, str
+                                                   inet->inet_daddr,
+                                                   inet->inet_sport,
+                                                   inet->inet_dport);
+-      inet->inet_id = dp->dccps_iss ^ jiffies;
++      inet->inet_id = prandom_u32();
+       err = dccp_connect(sk);
+       rt = NULL;
+--- a/net/ipv4/datagram.c
++++ b/net/ipv4/datagram.c
+@@ -75,7 +75,7 @@ int __ip4_datagram_connect(struct sock *
+       inet->inet_dport = usin->sin_port;
+       sk->sk_state = TCP_ESTABLISHED;
+       sk_set_txhash(sk);
+-      inet->inet_id = jiffies;
++      inet->inet_id = prandom_u32();
+       sk_dst_set(sk, &rt->dst);
+       err = 0;
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -239,7 +239,7 @@ int tcp_v4_connect(struct sock *sk, stru
+                                                          inet->inet_sport,
+                                                          usin->sin_port);
+-      inet->inet_id = tp->write_seq ^ jiffies;
++      inet->inet_id = prandom_u32();
+       err = tcp_connect(sk);
+@@ -1307,7 +1307,7 @@ struct sock *tcp_v4_syn_recv_sock(const
+       inet_csk(newsk)->icsk_ext_hdr_len = 0;
+       if (inet_opt)
+               inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
+-      newinet->inet_id = newtp->write_seq ^ jiffies;
++      newinet->inet_id = prandom_u32();
+       if (!dst) {
+               dst = inet_csk_route_child_sock(sk, newsk, req);
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -7734,7 +7734,7 @@ void sctp_copy_sock(struct sock *newsk,
+       newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
+       newinet->inet_dport = htons(asoc->peer.port);
+       newinet->pmtudisc = inet->pmtudisc;
+-      newinet->inet_id = asoc->next_tsn ^ jiffies;
++      newinet->inet_id = prandom_u32();
+       newinet->uc_ttl = inet->uc_ttl;
+       newinet->mc_loop = 1;
diff --git a/queue-4.9/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch b/queue-4.9/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch
new file mode 100644 (file)
index 0000000..43c4b4e
--- /dev/null
@@ -0,0 +1,79 @@
+From foo@baz Wed 06 Nov 2019 04:45:30 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Oct 2019 22:44:52 -0700
+Subject: net: add READ_ONCE() annotation in __skb_wait_for_more_packets()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7c422d0ce97552dde4a97e6290de70ec6efb0fc6 ]
+
+__skb_wait_for_more_packets() can be called while other cpus
+can feed packets to the socket receive queue.
+
+KCSAN reported :
+
+BUG: KCSAN: data-race in __skb_wait_for_more_packets / __udp_enqueue_schedule_skb
+
+write to 0xffff888102e40b58 of 8 bytes by interrupt on cpu 0:
+ __skb_insert include/linux/skbuff.h:1852 [inline]
+ __skb_queue_before include/linux/skbuff.h:1958 [inline]
+ __skb_queue_tail include/linux/skbuff.h:1991 [inline]
+ __udp_enqueue_schedule_skb+0x2d7/0x410 net/ipv4/udp.c:1470
+ __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline]
+ udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057
+ udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074
+ udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233
+ __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300
+ udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470
+ ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204
+ ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
+ dst_input include/net/dst.h:442 [inline]
+ ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+
+read to 0xffff888102e40b58 of 8 bytes by task 13035 on cpu 1:
+ __skb_wait_for_more_packets+0xfa/0x320 net/core/datagram.c:100
+ __skb_recv_udp+0x374/0x500 net/ipv4/udp.c:1683
+ udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712
+ inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838
+ sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871
+ ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480
+ do_recvmmsg+0x19a/0x5c0 net/socket.c:2601
+ __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680
+ __do_sys_recvmmsg net/socket.c:2703 [inline]
+ __se_sys_recvmmsg net/socket.c:2696 [inline]
+ __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696
+ do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 13035 Comm: syz-executor.3 Not tainted 5.4.0-rc3+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/datagram.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -96,7 +96,7 @@ int __skb_wait_for_more_packets(struct s
+       if (error)
+               goto out_err;
+-      if (sk->sk_receive_queue.prev != skb)
++      if (READ_ONCE(sk->sk_receive_queue.prev) != skb)
+               goto out;
+       /* Socket shut down? */
diff --git a/queue-4.9/net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch b/queue-4.9/net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch
new file mode 100644 (file)
index 0000000..9e9e127
--- /dev/null
@@ -0,0 +1,62 @@
+From foo@baz Wed 06 Nov 2019 04:08:37 PM CET
+From: Doug Berger <opendmb@gmail.com>
+Date: Wed, 16 Oct 2019 16:06:32 -0700
+Subject: net: bcmgenet: reset 40nm EPHY on energy detect
+
+From: Doug Berger <opendmb@gmail.com>
+
+[ Upstream commit 25382b991d252aed961cd434176240f9de6bb15f ]
+
+The EPHY integrated into the 40nm Set-Top Box devices can falsely
+detect energy when connected to a disabled peer interface. When the
+peer interface is enabled the EPHY will detect and report the link
+as active, but on occasion may get into a state where it is not
+able to exchange data with the connected GENET MAC. This issue has
+not been observed when the link parameters are auto-negotiated;
+however, it has been observed with a manually configured link.
+
+It has been empirically determined that issuing a soft reset to the
+EPHY when energy is detected prevents it from getting into this bad
+state.
+
+Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
+Signed-off-by: Doug Berger <opendmb@gmail.com>
+Acked-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -1914,6 +1914,8 @@ static void bcmgenet_link_intr_enable(st
+        */
+       if (priv->internal_phy) {
+               int0_enable |= UMAC_IRQ_LINK_EVENT;
++              if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv))
++                      int0_enable |= UMAC_IRQ_PHY_DET_R;
+       } else if (priv->ext_phy) {
+               int0_enable |= UMAC_IRQ_LINK_EVENT;
+       } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
+@@ -2531,6 +2533,10 @@ static void bcmgenet_irq_task(struct wor
+               bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC);
+       }
++      if (status & UMAC_IRQ_PHY_DET_R &&
++          priv->dev->phydev->autoneg != AUTONEG_ENABLE)
++              phy_init_hw(priv->dev->phydev);
++
+       /* Link UP/DOWN event */
+       if (status & UMAC_IRQ_LINK_EVENT)
+               phy_mac_interrupt(priv->phydev,
+@@ -2627,8 +2633,7 @@ static irqreturn_t bcmgenet_isr0(int irq
+       }
+       /* all other interested interrupts handled in bottom half */
+-      status &= (UMAC_IRQ_LINK_EVENT |
+-                 UMAC_IRQ_MPD_R);
++      status &= (UMAC_IRQ_LINK_EVENT | UMAC_IRQ_MPD_R | UMAC_IRQ_PHY_DET_R);
+       if (status) {
+               /* Save irq status for bottom-half processing. */
+               spin_lock_irqsave(&priv->lock, flags);
diff --git a/queue-4.9/net-dsa-fix-switch-tree-list.patch b/queue-4.9/net-dsa-fix-switch-tree-list.patch
new file mode 100644 (file)
index 0000000..713a300
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Wed 06 Nov 2019 04:08:37 PM CET
+From: Vivien Didelot <vivien.didelot@gmail.com>
+Date: Fri, 18 Oct 2019 17:02:46 -0400
+Subject: net: dsa: fix switch tree list
+
+From: Vivien Didelot <vivien.didelot@gmail.com>
+
+[ Upstream commit 50c7d2ba9de20f60a2d527ad6928209ef67e4cdd ]
+
+If there are multiple switch trees on the device, only the last one
+will be listed, because the arguments of list_add_tail are swapped.
+
+Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation")
+Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dsa/dsa2.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/dsa/dsa2.c
++++ b/net/dsa/dsa2.c
+@@ -59,7 +59,7 @@ static struct dsa_switch_tree *dsa_add_d
+       dst->tree = tree;
+       dst->cpu_switch = -1;
+       INIT_LIST_HEAD(&dst->list);
+-      list_add_tail(&dsa_switch_trees, &dst->list);
++      list_add_tail(&dst->list, &dsa_switch_trees);
+       kref_init(&dst->refcount);
+       return dst;
diff --git a/queue-4.9/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch b/queue-4.9/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch
new file mode 100644 (file)
index 0000000..fb3b9a7
--- /dev/null
@@ -0,0 +1,160 @@
+From foo@baz Wed 06 Nov 2019 04:45:30 PM CET
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 24 Oct 2019 13:50:27 -0700
+Subject: net: fix sk_page_frag() recursion from memory reclaim
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit 20eb4f29b60286e0d6dc01d9c260b4bd383c58fb ]
+
+sk_page_frag() optimizes skb_frag allocations by using per-task
+skb_frag cache when it knows it's the only user.  The condition is
+determined by seeing whether the socket allocation mask allows
+blocking - if the allocation may block, it obviously owns the task's
+context and ergo exclusively owns current->task_frag.
+
+Unfortunately, this misses recursion through memory reclaim path.
+Please take a look at the following backtrace.
+
+ [2] RIP: 0010:tcp_sendmsg_locked+0xccf/0xe10
+     ...
+     tcp_sendmsg+0x27/0x40
+     sock_sendmsg+0x30/0x40
+     sock_xmit.isra.24+0xa1/0x170 [nbd]
+     nbd_send_cmd+0x1d2/0x690 [nbd]
+     nbd_queue_rq+0x1b5/0x3b0 [nbd]
+     __blk_mq_try_issue_directly+0x108/0x1b0
+     blk_mq_request_issue_directly+0xbd/0xe0
+     blk_mq_try_issue_list_directly+0x41/0xb0
+     blk_mq_sched_insert_requests+0xa2/0xe0
+     blk_mq_flush_plug_list+0x205/0x2a0
+     blk_flush_plug_list+0xc3/0xf0
+ [1] blk_finish_plug+0x21/0x2e
+     _xfs_buf_ioapply+0x313/0x460
+     __xfs_buf_submit+0x67/0x220
+     xfs_buf_read_map+0x113/0x1a0
+     xfs_trans_read_buf_map+0xbf/0x330
+     xfs_btree_read_buf_block.constprop.42+0x95/0xd0
+     xfs_btree_lookup_get_block+0x95/0x170
+     xfs_btree_lookup+0xcc/0x470
+     xfs_bmap_del_extent_real+0x254/0x9a0
+     __xfs_bunmapi+0x45c/0xab0
+     xfs_bunmapi+0x15/0x30
+     xfs_itruncate_extents_flags+0xca/0x250
+     xfs_free_eofblocks+0x181/0x1e0
+     xfs_fs_destroy_inode+0xa8/0x1b0
+     destroy_inode+0x38/0x70
+     dispose_list+0x35/0x50
+     prune_icache_sb+0x52/0x70
+     super_cache_scan+0x120/0x1a0
+     do_shrink_slab+0x120/0x290
+     shrink_slab+0x216/0x2b0
+     shrink_node+0x1b6/0x4a0
+     do_try_to_free_pages+0xc6/0x370
+     try_to_free_mem_cgroup_pages+0xe3/0x1e0
+     try_charge+0x29e/0x790
+     mem_cgroup_charge_skmem+0x6a/0x100
+     __sk_mem_raise_allocated+0x18e/0x390
+     __sk_mem_schedule+0x2a/0x40
+ [0] tcp_sendmsg_locked+0x8eb/0xe10
+     tcp_sendmsg+0x27/0x40
+     sock_sendmsg+0x30/0x40
+     ___sys_sendmsg+0x26d/0x2b0
+     __sys_sendmsg+0x57/0xa0
+     do_syscall_64+0x42/0x100
+     entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+In [0], tcp_send_msg_locked() was using current->page_frag when it
+called sk_wmem_schedule().  It already calculated how many bytes can
+be fit into current->page_frag.  Due to memory pressure,
+sk_wmem_schedule() called into memory reclaim path which called into
+xfs and then IO issue path.  Because the filesystem in question is
+backed by nbd, the control goes back into the tcp layer - back into
+tcp_sendmsg_locked().
+
+nbd sets sk_allocation to (GFP_NOIO | __GFP_MEMALLOC) which makes
+sense - it's in the process of freeing memory and wants to be able to,
+e.g., drop clean pages to make forward progress.  However, this
+confused sk_page_frag() called from [2].  Because it only tests
+whether the allocation allows blocking which it does, it now thinks
+current->page_frag can be used again although it already was being
+used in [0].
+
+After [2] used current->page_frag, the offset would be increased by
+the used amount.  When the control returns to [0],
+current->page_frag's offset is increased and the previously calculated
+number of bytes now may overrun the end of allocated memory leading to
+silent memory corruptions.
+
+Fix it by adding gfpflags_normal_context() which tests sleepable &&
+!reclaim and use it to determine whether to use current->task_frag.
+
+v2: Eric didn't like gfp flags being tested twice.  Introduce a new
+    helper gfpflags_normal_context() and combine the two tests.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Cc: Josef Bacik <josef@toxicpanda.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/gfp.h |   23 +++++++++++++++++++++++
+ include/net/sock.h  |   11 ++++++++---
+ 2 files changed, 31 insertions(+), 3 deletions(-)
+
+--- a/include/linux/gfp.h
++++ b/include/linux/gfp.h
+@@ -284,6 +284,29 @@ static inline bool gfpflags_allow_blocki
+       return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
+ }
++/**
++ * gfpflags_normal_context - is gfp_flags a normal sleepable context?
++ * @gfp_flags: gfp_flags to test
++ *
++ * Test whether @gfp_flags indicates that the allocation is from the
++ * %current context and allowed to sleep.
++ *
++ * An allocation being allowed to block doesn't mean it owns the %current
++ * context.  When direct reclaim path tries to allocate memory, the
++ * allocation context is nested inside whatever %current was doing at the
++ * time of the original allocation.  The nested allocation may be allowed
++ * to block but modifying anything %current owns can corrupt the outer
++ * context's expectations.
++ *
++ * %true result from this function indicates that the allocation context
++ * can sleep and use anything that's associated with %current.
++ */
++static inline bool gfpflags_normal_context(const gfp_t gfp_flags)
++{
++      return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) ==
++              __GFP_DIRECT_RECLAIM;
++}
++
+ #ifdef CONFIG_HIGHMEM
+ #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
+ #else
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -2045,12 +2045,17 @@ struct sk_buff *sk_stream_alloc_skb(stru
+  * sk_page_frag - return an appropriate page_frag
+  * @sk: socket
+  *
+- * If socket allocation mode allows current thread to sleep, it means its
+- * safe to use the per task page_frag instead of the per socket one.
++ * Use the per task page_frag instead of the per socket one for
++ * optimization when we know that we're in the normal context and owns
++ * everything that's associated with %current.
++ *
++ * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
++ * inside other socket operations and end up recursing into sk_page_frag()
++ * while it's already in use.
+  */
+ static inline struct page_frag *sk_page_frag(struct sock *sk)
+ {
+-      if (gfpflags_allow_blocking(sk->sk_allocation))
++      if (gfpflags_normal_context(sk->sk_allocation))
+               return &current->task_frag;
+       return &sk->sk_frag;
diff --git a/queue-4.9/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch b/queue-4.9/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch
new file mode 100644 (file)
index 0000000..0cd388b
--- /dev/null
@@ -0,0 +1,76 @@
+From foo@baz Wed 06 Nov 2019 04:45:30 PM CET
+From: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
+Date: Mon, 28 Oct 2019 13:09:46 +0800
+Subject: net: hisilicon: Fix ping latency when deal with high throughput
+
+From: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
+
+[ Upstream commit e56bd641ca61beb92b135298d5046905f920b734 ]
+
+This is due to error in over budget processing.
+When dealing with high throughput, the used buffers
+that exceeds the budget is not cleaned up. In addition,
+it takes a lot of cycles to clean up the used buffer,
+and then the buffer where the valid data is located can take effect.
+
+Signed-off-by: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hip04_eth.c |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
++++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
+@@ -174,6 +174,7 @@ struct hip04_priv {
+       dma_addr_t rx_phys[RX_DESC_NUM];
+       unsigned int rx_head;
+       unsigned int rx_buf_size;
++      unsigned int rx_cnt_remaining;
+       struct device_node *phy_node;
+       struct phy_device *phy;
+@@ -487,7 +488,6 @@ static int hip04_rx_poll(struct napi_str
+       struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi);
+       struct net_device *ndev = priv->ndev;
+       struct net_device_stats *stats = &ndev->stats;
+-      unsigned int cnt = hip04_recv_cnt(priv);
+       struct rx_desc *desc;
+       struct sk_buff *skb;
+       unsigned char *buf;
+@@ -500,8 +500,8 @@ static int hip04_rx_poll(struct napi_str
+       /* clean up tx descriptors */
+       tx_remaining = hip04_tx_reclaim(ndev, false);
+-
+-      while (cnt && !last) {
++      priv->rx_cnt_remaining += hip04_recv_cnt(priv);
++      while (priv->rx_cnt_remaining && !last) {
+               buf = priv->rx_buf[priv->rx_head];
+               skb = build_skb(buf, priv->rx_buf_size);
+               if (unlikely(!skb)) {
+@@ -547,11 +547,13 @@ refill:
+               hip04_set_recv_desc(priv, phys);
+               priv->rx_head = RX_NEXT(priv->rx_head);
+-              if (rx >= budget)
++              if (rx >= budget) {
++                      --priv->rx_cnt_remaining;
+                       goto done;
++              }
+-              if (--cnt == 0)
+-                      cnt = hip04_recv_cnt(priv);
++              if (--priv->rx_cnt_remaining == 0)
++                      priv->rx_cnt_remaining += hip04_recv_cnt(priv);
+       }
+       if (!(priv->reg_inten & RCV_INT)) {
+@@ -636,6 +638,7 @@ static int hip04_mac_open(struct net_dev
+       int i;
+       priv->rx_head = 0;
++      priv->rx_cnt_remaining = 0;
+       priv->tx_head = 0;
+       priv->tx_tail = 0;
+       hip04_reset_ppe(priv);
diff --git a/queue-4.9/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch b/queue-4.9/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch
new file mode 100644 (file)
index 0000000..fc7f96b
--- /dev/null
@@ -0,0 +1,94 @@
+From foo@baz Wed 06 Nov 2019 04:08:37 PM CET
+From: Eran Ben Elisha <eranbe@mellanox.com>
+Date: Sun, 27 Oct 2019 16:39:15 +0200
+Subject: net/mlx4_core: Dynamically set guaranteed amount of counters per VF
+
+From: Eran Ben Elisha <eranbe@mellanox.com>
+
+[ Upstream commit e19868efea0c103f23b4b7e986fd0a703822111f ]
+
+Prior to this patch, the amount of counters guaranteed per VF in the
+resource tracker was MLX4_VF_COUNTERS_PER_PORT * MLX4_MAX_PORTS. It was
+set regardless if the VF was single or dual port.
+This caused several VFs to have no guaranteed counters although the
+system could satisfy their request.
+
+The fix is to dynamically guarantee counters, based on each VF
+specification.
+
+Fixes: 9de92c60beaa ("net/mlx4_core: Adjust counter grant policy in the resource tracker")
+Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/resource_tracker.c |   42 +++++++++++-------
+ 1 file changed, 26 insertions(+), 16 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
++++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+@@ -470,12 +470,31 @@ void mlx4_init_quotas(struct mlx4_dev *d
+               priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf];
+ }
+-static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev)
++static int
++mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev,
++                               struct resource_allocator *res_alloc,
++                               int vf)
+ {
+-      /* reduce the sink counter */
+-      return (dev->caps.max_counters - 1 -
+-              (MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS))
+-              / MLX4_MAX_PORTS;
++      struct mlx4_active_ports actv_ports;
++      int ports, counters_guaranteed;
++
++      /* For master, only allocate according to the number of phys ports */
++      if (vf == mlx4_master_func_num(dev))
++              return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports;
++
++      /* calculate real number of ports for the VF */
++      actv_ports = mlx4_get_active_ports(dev, vf);
++      ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
++      counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT;
++
++      /* If we do not have enough counters for this VF, do not
++       * allocate any for it. '-1' to reduce the sink counter.
++       */
++      if ((res_alloc->res_reserved + counters_guaranteed) >
++          (dev->caps.max_counters - 1))
++              return 0;
++
++      return counters_guaranteed;
+ }
+ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
+@@ -483,7 +502,6 @@ int mlx4_init_resource_tracker(struct ml
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       int i, j;
+       int t;
+-      int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev);
+       priv->mfunc.master.res_tracker.slave_list =
+               kzalloc(dev->num_slaves * sizeof(struct slave_list),
+@@ -600,16 +618,8 @@ int mlx4_init_resource_tracker(struct ml
+                               break;
+                       case RES_COUNTER:
+                               res_alloc->quota[t] = dev->caps.max_counters;
+-                              if (t == mlx4_master_func_num(dev))
+-                                      res_alloc->guaranteed[t] =
+-                                              MLX4_PF_COUNTERS_PER_PORT *
+-                                              MLX4_MAX_PORTS;
+-                              else if (t <= max_vfs_guarantee_counter)
+-                                      res_alloc->guaranteed[t] =
+-                                              MLX4_VF_COUNTERS_PER_PORT *
+-                                              MLX4_MAX_PORTS;
+-                              else
+-                                      res_alloc->guaranteed[t] = 0;
++                              res_alloc->guaranteed[t] =
++                                      mlx4_calc_res_counter_guaranteed(dev, res_alloc, t);
+                               res_alloc->res_free -= res_alloc->guaranteed[t];
+                               break;
+                       default:
diff --git a/queue-4.9/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch b/queue-4.9/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch
new file mode 100644 (file)
index 0000000..8d06074
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Wed 06 Nov 2019 04:45:30 PM CET
+From: zhanglin <zhang.lin16@zte.com.cn>
+Date: Sat, 26 Oct 2019 15:54:16 +0800
+Subject: net: Zeroing the structure ethtool_wolinfo in ethtool_get_wol()
+
+From: zhanglin <zhang.lin16@zte.com.cn>
+
+[ Upstream commit 5ff223e86f5addbfae26419cbb5d61d98f6fbf7d ]
+
+memset() the structure ethtool_wolinfo that has padded bytes
+but the padded bytes have not been zeroed out.
+
+Signed-off-by: zhanglin <zhang.lin16@zte.com.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/ethtool.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/core/ethtool.c
++++ b/net/core/ethtool.c
+@@ -1438,11 +1438,13 @@ static int ethtool_reset(struct net_devi
+ static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
+ {
+-      struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
++      struct ethtool_wolinfo wol;
+       if (!dev->ethtool_ops->get_wol)
+               return -EOPNOTSUPP;
++      memset(&wol, 0, sizeof(struct ethtool_wolinfo));
++      wol.cmd = ETHTOOL_GWOL;
+       dev->ethtool_ops->get_wol(dev, &wol);
+       if (copy_to_user(useraddr, &wol, sizeof(wol)))
diff --git a/queue-4.9/selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch b/queue-4.9/selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch
new file mode 100644 (file)
index 0000000..4d7a88a
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Wed 06 Nov 2019 04:45:30 PM CET
+From: Wei Wang <weiwan@google.com>
+Date: Thu, 31 Oct 2019 16:24:36 -0700
+Subject: selftests: net: reuseport_dualstack: fix uninitalized parameter
+
+From: Wei Wang <weiwan@google.com>
+
+[ Upstream commit d64479a3e3f9924074ca7b50bd72fa5211dca9c1 ]
+
+This test reports EINVAL for getsockopt(SOL_SOCKET, SO_DOMAIN)
+occasionally due to the uninitialized length parameter.
+Initialize it to fix this, and also use int for "test_family" to comply
+with the API standard.
+
+Fixes: d6a61f80b871 ("soreuseport: test mixed v4/v6 sockets")
+Reported-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Wei Wang <weiwan@google.com>
+Cc: Craig Gallek <cgallek@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/reuseport_dualstack.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/net/reuseport_dualstack.c
++++ b/tools/testing/selftests/net/reuseport_dualstack.c
+@@ -128,7 +128,7 @@ static void test(int *rcv_fds, int count
+ {
+       struct epoll_event ev;
+       int epfd, i, test_fd;
+-      uint16_t test_family;
++      int test_family;
+       socklen_t len;
+       epfd = epoll_create(1);
+@@ -145,6 +145,7 @@ static void test(int *rcv_fds, int count
+       send_from_v4(proto);
+       test_fd = receive_once(epfd, proto);
++      len = sizeof(test_family);
+       if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len))
+               error(1, errno, "failed to read socket domain");
+       if (test_family != AF_INET)
index f5a33de8fd808b44f6064ca85147f8bd9c366f55..be7b3d39104e4a8cbe0a7e1a5714d36e585bde8a 100644 (file)
@@ -15,3 +15,14 @@ scsi-target-core-do-not-overwrite-cdb-byte-1.patch
 of-unittest-fix-memory-leak-in-unittest_data_add.patch
 mips-bmips-mark-exception-vectors-as-char-arrays.patch
 cifs-fix-cifsinodeinfo-lock_sem-deadlock-when-reconn.patch
+dccp-do-not-leak-jiffies-on-the-wire.patch
+net-fix-sk_page_frag-recursion-from-memory-reclaim.patch
+net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch
+net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch
+selftests-net-reuseport_dualstack-fix-uninitalized-parameter.patch
+net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch
+net-dsa-fix-switch-tree-list.patch
+net-bcmgenet-reset-40nm-ephy-on-energy-detect.patch
+vxlan-check-tun_info-options_len-properly.patch
+net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch
+inet-stop-leaking-jiffies-on-the-wire.patch
diff --git a/queue-4.9/vxlan-check-tun_info-options_len-properly.patch b/queue-4.9/vxlan-check-tun_info-options_len-properly.patch
new file mode 100644 (file)
index 0000000..9eb2b18
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Wed 06 Nov 2019 04:08:37 PM CET
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 29 Oct 2019 01:24:32 +0800
+Subject: vxlan: check tun_info options_len properly
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit eadf52cf1852196a1363044dcda22fa5d7f296f7 ]
+
+This patch is to improve the tun_info options_len by dropping
+the skb when TUNNEL_VXLAN_OPT is set but options_len is less
+than vxlan_metadata. This can void a potential out-of-bounds
+access on ip_tun_info.
+
+Fixes: ee122c79d422 ("vxlan: Flow based tunneling")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -2049,8 +2049,11 @@ static void vxlan_xmit_one(struct sk_buf
+               label = info->key.label;
+               udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
+-              if (info->options_len)
++              if (info->options_len) {
++                      if (info->options_len < sizeof(*md))
++                              goto drop;
+                       md = ip_tunnel_info_opts(info);
++              }
+       } else {
+               md->gbp = skb->mark;
+       }