4.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 6 Nov 2019 16:36:03 +0000 (17:36 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 6 Nov 2019 16:36:03 +0000 (17:36 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 6 Nov 2019 16:36:03 +0000 (17:36 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 6 Nov 2019 16:36:03 +0000 (17:36 +0100)
diff --git a/queue-4.4/dccp-do-not-leak-jiffies-on-the-wire.patch b/queue-4.4/dccp-do-not-leak-jiffies-on-the-wire.patch

new file mode 100644 (file)

index 0000000..4e0c8bf
--- /dev/null
+++ b/queue-4.4/dccp-do-not-leak-jiffies-on-the-wire.patch
@@ -0,0 +1,32 @@
+From foo@baz Wed 06 Nov 2019 05:02:50 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 4 Nov 2019 07:57:55 -0800
+Subject: dccp: do not leak jiffies on the wire
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3d1e5039f5f87a8731202ceca08764ee7cb010d3 ]
+
+For some reason I missed the case of DCCP passive
+flows in my previous patch.
+
+Fixes: a904a0693c18 ("inet: stop leaking jiffies on the wire")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Thiemo Nagel <tnagel@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv4.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -417,7 +417,7 @@ struct sock *dccp_v4_request_recv_sock(c
+       RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
+       newinet->mc_index  = inet_iif(skb);
+       newinet->mc_ttl    = ip_hdr(skb)->ttl;
+-      newinet->inet_id   = jiffies;
++      newinet->inet_id   = prandom_u32();
+ 
+       if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
+               goto put_and_exit;
diff --git a/queue-4.4/inet-stop-leaking-jiffies-on-the-wire.patch b/queue-4.4/inet-stop-leaking-jiffies-on-the-wire.patch

new file mode 100644 (file)

index 0000000..bd56b08
--- /dev/null
+++ b/queue-4.4/inet-stop-leaking-jiffies-on-the-wire.patch
@@ -0,0 +1,94 @@
+From foo@baz Wed 06 Nov 2019 04:08:37 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 1 Nov 2019 10:32:19 -0700
+Subject: inet: stop leaking jiffies on the wire
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a904a0693c189691eeee64f6c6b188bd7dc244e9 ]
+
+Historically linux tried to stick to RFC 791, 1122, 2003
+for IPv4 ID field generation.
+
+RFC 6864 made clear that no matter how hard we try,
+we can not ensure unicity of IP ID within maximum
+lifetime for all datagrams with a given source
+address/destination address/protocol tuple.
+
+Linux uses a per socket inet generator (inet_id), initialized
+at connection startup with a XOR of 'jiffies' and other
+fields that appear clear on the wire.
+
+Thiemo Nagel pointed that this strategy is a privacy
+concern as this provides 16 bits of entropy to fingerprint
+devices.
+
+Let's switch to a random starting point, this is just as
+good as far as RFC 6864 is concerned and does not leak
+anything critical.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Thiemo Nagel <tnagel@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv4.c     |    2 +-
+ net/ipv4/datagram.c |    2 +-
+ net/ipv4/tcp_ipv4.c |    4 ++--
+ net/sctp/socket.c   |    2 +-
+ 4 files changed, 5 insertions(+), 5 deletions(-)
+
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -121,7 +121,7 @@ int dccp_v4_connect(struct sock *sk, str
+                                                   inet->inet_daddr,
+                                                   inet->inet_sport,
+                                                   inet->inet_dport);
+-      inet->inet_id = dp->dccps_iss ^ jiffies;
++      inet->inet_id = prandom_u32();
+ 
+       err = dccp_connect(sk);
+       rt = NULL;
+--- a/net/ipv4/datagram.c
++++ b/net/ipv4/datagram.c
+@@ -75,7 +75,7 @@ int __ip4_datagram_connect(struct sock *
+       inet->inet_dport = usin->sin_port;
+       sk->sk_state = TCP_ESTABLISHED;
+       sk_set_txhash(sk);
+-      inet->inet_id = jiffies;
++      inet->inet_id = prandom_u32();
+ 
+       sk_dst_set(sk, &rt->dst);
+       err = 0;
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -241,7 +241,7 @@ int tcp_v4_connect(struct sock *sk, stru
+                                                          inet->inet_sport,
+                                                          usin->sin_port);
+ 
+-      inet->inet_id = tp->write_seq ^ jiffies;
++      inet->inet_id = prandom_u32();
+ 
+       err = tcp_connect(sk);
+ 
+@@ -1302,7 +1302,7 @@ struct sock *tcp_v4_syn_recv_sock(const
+       inet_csk(newsk)->icsk_ext_hdr_len = 0;
+       if (inet_opt)
+               inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
+-      newinet->inet_id = newtp->write_seq ^ jiffies;
++      newinet->inet_id = prandom_u32();
+ 
+       if (!dst) {
+               dst = inet_csk_route_child_sock(sk, newsk, req);
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -7267,7 +7267,7 @@ void sctp_copy_sock(struct sock *newsk,
+       newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
+       newinet->inet_dport = htons(asoc->peer.port);
+       newinet->pmtudisc = inet->pmtudisc;
+-      newinet->inet_id = asoc->next_tsn ^ jiffies;
++      newinet->inet_id = prandom_u32();
+ 
+       newinet->uc_ttl = inet->uc_ttl;
+       newinet->mc_loop = 1;
diff --git a/queue-4.4/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch b/queue-4.4/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch

new file mode 100644 (file)

index 0000000..84e9b08
--- /dev/null
+++ b/queue-4.4/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch
@@ -0,0 +1,79 @@
+From foo@baz Wed 06 Nov 2019 05:02:50 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Oct 2019 22:44:52 -0700
+Subject: net: add READ_ONCE() annotation in __skb_wait_for_more_packets()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7c422d0ce97552dde4a97e6290de70ec6efb0fc6 ]
+
+__skb_wait_for_more_packets() can be called while other cpus
+can feed packets to the socket receive queue.
+
+KCSAN reported :
+
+BUG: KCSAN: data-race in __skb_wait_for_more_packets / __udp_enqueue_schedule_skb
+
+write to 0xffff888102e40b58 of 8 bytes by interrupt on cpu 0:
+ __skb_insert include/linux/skbuff.h:1852 [inline]
+ __skb_queue_before include/linux/skbuff.h:1958 [inline]
+ __skb_queue_tail include/linux/skbuff.h:1991 [inline]
+ __udp_enqueue_schedule_skb+0x2d7/0x410 net/ipv4/udp.c:1470
+ __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline]
+ udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057
+ udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074
+ udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233
+ __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300
+ udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470
+ ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204
+ ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
+ dst_input include/net/dst.h:442 [inline]
+ ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
+ process_backlog+0x1d3/0x420 net/core/dev.c:5955
+
+read to 0xffff888102e40b58 of 8 bytes by task 13035 on cpu 1:
+ __skb_wait_for_more_packets+0xfa/0x320 net/core/datagram.c:100
+ __skb_recv_udp+0x374/0x500 net/ipv4/udp.c:1683
+ udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712
+ inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838
+ sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871
+ ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480
+ do_recvmmsg+0x19a/0x5c0 net/socket.c:2601
+ __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680
+ __do_sys_recvmmsg net/socket.c:2703 [inline]
+ __se_sys_recvmmsg net/socket.c:2696 [inline]
+ __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696
+ do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 13035 Comm: syz-executor.3 Not tainted 5.4.0-rc3+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/datagram.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -96,7 +96,7 @@ static int wait_for_more_packets(struct
+       if (error)
+               goto out_err;
+ 
+-      if (sk->sk_receive_queue.prev != skb)
++      if (READ_ONCE(sk->sk_receive_queue.prev) != skb)
+               goto out;
+ 
+       /* Socket shut down? */
diff --git a/queue-4.4/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch b/queue-4.4/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch

new file mode 100644 (file)

index 0000000..6538b6c
--- /dev/null
+++ b/queue-4.4/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch
@@ -0,0 +1,160 @@
+From foo@baz Wed 06 Nov 2019 05:02:50 PM CET
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 24 Oct 2019 13:50:27 -0700
+Subject: net: fix sk_page_frag() recursion from memory reclaim
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit 20eb4f29b60286e0d6dc01d9c260b4bd383c58fb ]
+
+sk_page_frag() optimizes skb_frag allocations by using per-task
+skb_frag cache when it knows it's the only user.  The condition is
+determined by seeing whether the socket allocation mask allows
+blocking - if the allocation may block, it obviously owns the task's
+context and ergo exclusively owns current->task_frag.
+
+Unfortunately, this misses recursion through memory reclaim path.
+Please take a look at the following backtrace.
+
+ [2] RIP: 0010:tcp_sendmsg_locked+0xccf/0xe10
+     ...
+     tcp_sendmsg+0x27/0x40
+     sock_sendmsg+0x30/0x40
+     sock_xmit.isra.24+0xa1/0x170 [nbd]
+     nbd_send_cmd+0x1d2/0x690 [nbd]
+     nbd_queue_rq+0x1b5/0x3b0 [nbd]
+     __blk_mq_try_issue_directly+0x108/0x1b0
+     blk_mq_request_issue_directly+0xbd/0xe0
+     blk_mq_try_issue_list_directly+0x41/0xb0
+     blk_mq_sched_insert_requests+0xa2/0xe0
+     blk_mq_flush_plug_list+0x205/0x2a0
+     blk_flush_plug_list+0xc3/0xf0
+ [1] blk_finish_plug+0x21/0x2e
+     _xfs_buf_ioapply+0x313/0x460
+     __xfs_buf_submit+0x67/0x220
+     xfs_buf_read_map+0x113/0x1a0
+     xfs_trans_read_buf_map+0xbf/0x330
+     xfs_btree_read_buf_block.constprop.42+0x95/0xd0
+     xfs_btree_lookup_get_block+0x95/0x170
+     xfs_btree_lookup+0xcc/0x470
+     xfs_bmap_del_extent_real+0x254/0x9a0
+     __xfs_bunmapi+0x45c/0xab0
+     xfs_bunmapi+0x15/0x30
+     xfs_itruncate_extents_flags+0xca/0x250
+     xfs_free_eofblocks+0x181/0x1e0
+     xfs_fs_destroy_inode+0xa8/0x1b0
+     destroy_inode+0x38/0x70
+     dispose_list+0x35/0x50
+     prune_icache_sb+0x52/0x70
+     super_cache_scan+0x120/0x1a0
+     do_shrink_slab+0x120/0x290
+     shrink_slab+0x216/0x2b0
+     shrink_node+0x1b6/0x4a0
+     do_try_to_free_pages+0xc6/0x370
+     try_to_free_mem_cgroup_pages+0xe3/0x1e0
+     try_charge+0x29e/0x790
+     mem_cgroup_charge_skmem+0x6a/0x100
+     __sk_mem_raise_allocated+0x18e/0x390
+     __sk_mem_schedule+0x2a/0x40
+ [0] tcp_sendmsg_locked+0x8eb/0xe10
+     tcp_sendmsg+0x27/0x40
+     sock_sendmsg+0x30/0x40
+     ___sys_sendmsg+0x26d/0x2b0
+     __sys_sendmsg+0x57/0xa0
+     do_syscall_64+0x42/0x100
+     entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+In [0], tcp_send_msg_locked() was using current->page_frag when it
+called sk_wmem_schedule().  It already calculated how many bytes can
+be fit into current->page_frag.  Due to memory pressure,
+sk_wmem_schedule() called into memory reclaim path which called into
+xfs and then IO issue path.  Because the filesystem in question is
+backed by nbd, the control goes back into the tcp layer - back into
+tcp_sendmsg_locked().
+
+nbd sets sk_allocation to (GFP_NOIO | __GFP_MEMALLOC) which makes
+sense - it's in the process of freeing memory and wants to be able to,
+e.g., drop clean pages to make forward progress.  However, this
+confused sk_page_frag() called from [2].  Because it only tests
+whether the allocation allows blocking which it does, it now thinks
+current->page_frag can be used again although it already was being
+used in [0].
+
+After [2] used current->page_frag, the offset would be increased by
+the used amount.  When the control returns to [0],
+current->page_frag's offset is increased and the previously calculated
+number of bytes now may overrun the end of allocated memory leading to
+silent memory corruptions.
+
+Fix it by adding gfpflags_normal_context() which tests sleepable &&
+!reclaim and use it to determine whether to use current->task_frag.
+
+v2: Eric didn't like gfp flags being tested twice.  Introduce a new
+    helper gfpflags_normal_context() and combine the two tests.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Cc: Josef Bacik <josef@toxicpanda.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/gfp.h |   23 +++++++++++++++++++++++
+ include/net/sock.h  |   11 ++++++++---
+ 2 files changed, 31 insertions(+), 3 deletions(-)
+
+--- a/include/linux/gfp.h
++++ b/include/linux/gfp.h
+@@ -274,6 +274,29 @@ static inline bool gfpflags_allow_blocki
+       return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM);
+ }
+ 
++/**
++ * gfpflags_normal_context - is gfp_flags a normal sleepable context?
++ * @gfp_flags: gfp_flags to test
++ *
++ * Test whether @gfp_flags indicates that the allocation is from the
++ * %current context and allowed to sleep.
++ *
++ * An allocation being allowed to block doesn't mean it owns the %current
++ * context.  When direct reclaim path tries to allocate memory, the
++ * allocation context is nested inside whatever %current was doing at the
++ * time of the original allocation.  The nested allocation may be allowed
++ * to block but modifying anything %current owns can corrupt the outer
++ * context's expectations.
++ *
++ * %true result from this function indicates that the allocation context
++ * can sleep and use anything that's associated with %current.
++ */
++static inline bool gfpflags_normal_context(const gfp_t gfp_flags)
++{
++      return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) ==
++              __GFP_DIRECT_RECLAIM;
++}
++
+ #ifdef CONFIG_HIGHMEM
+ #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
+ #else
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -2077,12 +2077,17 @@ struct sk_buff *sk_stream_alloc_skb(stru
+  * sk_page_frag - return an appropriate page_frag
+  * @sk: socket
+  *
+- * If socket allocation mode allows current thread to sleep, it means its
+- * safe to use the per task page_frag instead of the per socket one.
++ * Use the per task page_frag instead of the per socket one for
++ * optimization when we know that we're in the normal context and owns
++ * everything that's associated with %current.
++ *
++ * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
++ * inside other socket operations and end up recursing into sk_page_frag()
++ * while it's already in use.
+  */
+ static inline struct page_frag *sk_page_frag(struct sock *sk)
+ {
+-      if (gfpflags_allow_blocking(sk->sk_allocation))
++      if (gfpflags_normal_context(sk->sk_allocation))
+               return &current->task_frag;
+ 
+       return &sk->sk_frag;
diff --git a/queue-4.4/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch b/queue-4.4/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch

new file mode 100644 (file)

index 0000000..e4325a6
--- /dev/null
+++ b/queue-4.4/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch
@@ -0,0 +1,76 @@
+From foo@baz Wed 06 Nov 2019 05:02:50 PM CET
+From: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
+Date: Mon, 28 Oct 2019 13:09:46 +0800
+Subject: net: hisilicon: Fix ping latency when deal with high throughput
+
+From: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
+
+[ Upstream commit e56bd641ca61beb92b135298d5046905f920b734 ]
+
+This is due to error in over budget processing.
+When dealing with high throughput, the used buffers
+that exceeds the budget is not cleaned up. In addition,
+it takes a lot of cycles to clean up the used buffer,
+and then the buffer where the valid data is located can take effect.
+
+Signed-off-by: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hip04_eth.c |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
++++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
+@@ -174,6 +174,7 @@ struct hip04_priv {
+       dma_addr_t rx_phys[RX_DESC_NUM];
+       unsigned int rx_head;
+       unsigned int rx_buf_size;
++      unsigned int rx_cnt_remaining;
+ 
+       struct device_node *phy_node;
+       struct phy_device *phy;
+@@ -487,7 +488,6 @@ static int hip04_rx_poll(struct napi_str
+       struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi);
+       struct net_device *ndev = priv->ndev;
+       struct net_device_stats *stats = &ndev->stats;
+-      unsigned int cnt = hip04_recv_cnt(priv);
+       struct rx_desc *desc;
+       struct sk_buff *skb;
+       unsigned char *buf;
+@@ -500,8 +500,8 @@ static int hip04_rx_poll(struct napi_str
+ 
+       /* clean up tx descriptors */
+       tx_remaining = hip04_tx_reclaim(ndev, false);
+-
+-      while (cnt && !last) {
++      priv->rx_cnt_remaining += hip04_recv_cnt(priv);
++      while (priv->rx_cnt_remaining && !last) {
+               buf = priv->rx_buf[priv->rx_head];
+               skb = build_skb(buf, priv->rx_buf_size);
+               if (unlikely(!skb))
+@@ -544,11 +544,13 @@ static int hip04_rx_poll(struct napi_str
+               hip04_set_recv_desc(priv, phys);
+ 
+               priv->rx_head = RX_NEXT(priv->rx_head);
+-              if (rx >= budget)
++              if (rx >= budget) {
++                      --priv->rx_cnt_remaining;
+                       goto done;
++              }
+ 
+-              if (--cnt == 0)
+-                      cnt = hip04_recv_cnt(priv);
++              if (--priv->rx_cnt_remaining == 0)
++                      priv->rx_cnt_remaining += hip04_recv_cnt(priv);
+       }
+ 
+       if (!(priv->reg_inten & RCV_INT)) {
+@@ -633,6 +635,7 @@ static int hip04_mac_open(struct net_dev
+       int i;
+ 
+       priv->rx_head = 0;
++      priv->rx_cnt_remaining = 0;
+       priv->tx_head = 0;
+       priv->tx_tail = 0;
+       hip04_reset_ppe(priv);
diff --git a/queue-4.4/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch b/queue-4.4/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch

new file mode 100644 (file)

index 0000000..38916ac
--- /dev/null
+++ b/queue-4.4/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch
@@ -0,0 +1,94 @@
+From foo@baz Wed 06 Nov 2019 04:08:37 PM CET
+From: Eran Ben Elisha <eranbe@mellanox.com>
+Date: Sun, 27 Oct 2019 16:39:15 +0200
+Subject: net/mlx4_core: Dynamically set guaranteed amount of counters per VF
+
+From: Eran Ben Elisha <eranbe@mellanox.com>
+
+[ Upstream commit e19868efea0c103f23b4b7e986fd0a703822111f ]
+
+Prior to this patch, the amount of counters guaranteed per VF in the
+resource tracker was MLX4_VF_COUNTERS_PER_PORT * MLX4_MAX_PORTS. It was
+set regardless if the VF was single or dual port.
+This caused several VFs to have no guaranteed counters although the
+system could satisfy their request.
+
+The fix is to dynamically guarantee counters, based on each VF
+specification.
+
+Fixes: 9de92c60beaa ("net/mlx4_core: Adjust counter grant policy in the resource tracker")
+Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/resource_tracker.c |   42 +++++++++++-------
+ 1 file changed, 26 insertions(+), 16 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
++++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+@@ -463,12 +463,31 @@ void mlx4_init_quotas(struct mlx4_dev *d
+               priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf];
+ }
+ 
+-static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev)
++static int
++mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev,
++                               struct resource_allocator *res_alloc,
++                               int vf)
+ {
+-      /* reduce the sink counter */
+-      return (dev->caps.max_counters - 1 -
+-              (MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS))
+-              / MLX4_MAX_PORTS;
++      struct mlx4_active_ports actv_ports;
++      int ports, counters_guaranteed;
++
++      /* For master, only allocate according to the number of phys ports */
++      if (vf == mlx4_master_func_num(dev))
++              return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports;
++
++      /* calculate real number of ports for the VF */
++      actv_ports = mlx4_get_active_ports(dev, vf);
++      ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
++      counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT;
++
++      /* If we do not have enough counters for this VF, do not
++       * allocate any for it. '-1' to reduce the sink counter.
++       */
++      if ((res_alloc->res_reserved + counters_guaranteed) >
++          (dev->caps.max_counters - 1))
++              return 0;
++
++      return counters_guaranteed;
+ }
+ 
+ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
+@@ -476,7 +495,6 @@ int mlx4_init_resource_tracker(struct ml
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       int i, j;
+       int t;
+-      int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev);
+ 
+       priv->mfunc.master.res_tracker.slave_list =
+               kzalloc(dev->num_slaves * sizeof(struct slave_list),
+@@ -593,16 +611,8 @@ int mlx4_init_resource_tracker(struct ml
+                               break;
+                       case RES_COUNTER:
+                               res_alloc->quota[t] = dev->caps.max_counters;
+-                              if (t == mlx4_master_func_num(dev))
+-                                      res_alloc->guaranteed[t] =
+-                                              MLX4_PF_COUNTERS_PER_PORT *
+-                                              MLX4_MAX_PORTS;
+-                              else if (t <= max_vfs_guarantee_counter)
+-                                      res_alloc->guaranteed[t] =
+-                                              MLX4_VF_COUNTERS_PER_PORT *
+-                                              MLX4_MAX_PORTS;
+-                              else
+-                                      res_alloc->guaranteed[t] = 0;
++                              res_alloc->guaranteed[t] =
++                                      mlx4_calc_res_counter_guaranteed(dev, res_alloc, t);
+                               res_alloc->res_free -= res_alloc->guaranteed[t];
+                               break;
+                       default:
diff --git a/queue-4.4/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch b/queue-4.4/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch

new file mode 100644 (file)

index 0000000..6d758f5
--- /dev/null
+++ b/queue-4.4/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch
@@ -0,0 +1,36 @@
+From foo@baz Wed 06 Nov 2019 05:02:50 PM CET
+From: zhanglin <zhang.lin16@zte.com.cn>
+Date: Sat, 26 Oct 2019 15:54:16 +0800
+Subject: net: Zeroing the structure ethtool_wolinfo in ethtool_get_wol()
+
+From: zhanglin <zhang.lin16@zte.com.cn>
+
+[ Upstream commit 5ff223e86f5addbfae26419cbb5d61d98f6fbf7d ]
+
+memset() the structure ethtool_wolinfo that has padded bytes
+but the padded bytes have not been zeroed out.
+
+Signed-off-by: zhanglin <zhang.lin16@zte.com.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/ethtool.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/core/ethtool.c
++++ b/net/core/ethtool.c
+@@ -941,11 +941,13 @@ static int ethtool_reset(struct net_devi
+ 
+ static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
+ {
+-      struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
++      struct ethtool_wolinfo wol;
+ 
+       if (!dev->ethtool_ops->get_wol)
+               return -EOPNOTSUPP;
+ 
++      memset(&wol, 0, sizeof(struct ethtool_wolinfo));
++      wol.cmd = ETHTOOL_GWOL;
+       dev->ethtool_ops->get_wol(dev, &wol);
+ 
+       if (copy_to_user(useraddr, &wol, sizeof(wol)))
diff --git a/queue-4.4/series b/queue-4.4/series

index ecc822df83f103f5cb2509ccbc035e504769a551..4e2183262ca89dba3e4736ed8c1deb3935d46193 100644 (file)
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -11,3 +11,11 @@ scsi-target-core-do-not-overwrite-cdb-byte-1.patch
  of-unittest-fix-memory-leak-in-unittest_data_add.patch
  mips-bmips-mark-exception-vectors-as-char-arrays.patch
  cifs-fix-cifsinodeinfo-lock_sem-deadlock-when-reconn.patch
+dccp-do-not-leak-jiffies-on-the-wire.patch
+net-fix-sk_page_frag-recursion-from-memory-reclaim.patch
+net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch
+net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch
+net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch
+vxlan-check-tun_info-options_len-properly.patch
+net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch
+inet-stop-leaking-jiffies-on-the-wire.patch
diff --git a/queue-4.4/vxlan-check-tun_info-options_len-properly.patch b/queue-4.4/vxlan-check-tun_info-options_len-properly.patch

new file mode 100644 (file)

index 0000000..e5c218e
--- /dev/null
+++ b/queue-4.4/vxlan-check-tun_info-options_len-properly.patch
@@ -0,0 +1,37 @@
+From foo@baz Wed 06 Nov 2019 04:08:37 PM CET
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 29 Oct 2019 01:24:32 +0800
+Subject: vxlan: check tun_info options_len properly
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit eadf52cf1852196a1363044dcda22fa5d7f296f7 ]
+
+This patch is to improve the tun_info options_len by dropping
+the skb when TUNNEL_VXLAN_OPT is set but options_len is less
+than vxlan_metadata. This can void a potential out-of-bounds
+access on ip_tun_info.
+
+Fixes: ee122c79d422 ("vxlan: Flow based tunneling")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -2006,8 +2006,11 @@ static void vxlan_xmit_one(struct sk_buf
+               ttl = info->key.ttl;
+               tos = info->key.tos;
+ 
+-              if (info->options_len)
++              if (info->options_len) {
++                      if (info->options_len < sizeof(*md))
++                              goto drop;
+                       md = ip_tunnel_info_opts(info);
++              }
+       } else {
+               md->gbp = skb->mark;
+       }
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 6 Nov 2019 16:36:03 +0000 (17:36 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 6 Nov 2019 16:36:03 +0000 (17:36 +0100)
queue-4.4/dccp-do-not-leak-jiffies-on-the-wire.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/inet-stop-leaking-jiffies-on-the-wire.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/series		patch \| blob \| blame \| history
queue-4.4/vxlan-check-tun_info-options_len-properly.patch	[new file with mode: 0644]	patch \| blob