From: Greg Kroah-Hartman Date: Wed, 6 Nov 2019 16:36:03 +0000 (+0100) Subject: 4.4-stable patches X-Git-Tag: v4.4.200~23 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c3dd6a0def508f5545eb1f22566a33f7c67f6e19;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: dccp-do-not-leak-jiffies-on-the-wire.patch inet-stop-leaking-jiffies-on-the-wire.patch net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch net-fix-sk_page_frag-recursion-from-memory-reclaim.patch net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch vxlan-check-tun_info-options_len-properly.patch --- diff --git a/queue-4.4/dccp-do-not-leak-jiffies-on-the-wire.patch b/queue-4.4/dccp-do-not-leak-jiffies-on-the-wire.patch new file mode 100644 index 00000000000..4e0c8bf430a --- /dev/null +++ b/queue-4.4/dccp-do-not-leak-jiffies-on-the-wire.patch @@ -0,0 +1,32 @@ +From foo@baz Wed 06 Nov 2019 05:02:50 PM CET +From: Eric Dumazet +Date: Mon, 4 Nov 2019 07:57:55 -0800 +Subject: dccp: do not leak jiffies on the wire + +From: Eric Dumazet + +[ Upstream commit 3d1e5039f5f87a8731202ceca08764ee7cb010d3 ] + +For some reason I missed the case of DCCP passive +flows in my previous patch. + +Fixes: a904a0693c18 ("inet: stop leaking jiffies on the wire") +Signed-off-by: Eric Dumazet +Reported-by: Thiemo Nagel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv4.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -417,7 +417,7 @@ struct sock *dccp_v4_request_recv_sock(c + RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); + newinet->mc_index = inet_iif(skb); + newinet->mc_ttl = ip_hdr(skb)->ttl; +- newinet->inet_id = jiffies; ++ newinet->inet_id = prandom_u32(); + + if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) + goto put_and_exit; diff --git a/queue-4.4/inet-stop-leaking-jiffies-on-the-wire.patch b/queue-4.4/inet-stop-leaking-jiffies-on-the-wire.patch new file mode 100644 index 00000000000..bd56b08f01c --- /dev/null +++ b/queue-4.4/inet-stop-leaking-jiffies-on-the-wire.patch @@ -0,0 +1,94 @@ +From foo@baz Wed 06 Nov 2019 04:08:37 PM CET +From: Eric Dumazet +Date: Fri, 1 Nov 2019 10:32:19 -0700 +Subject: inet: stop leaking jiffies on the wire + +From: Eric Dumazet + +[ Upstream commit a904a0693c189691eeee64f6c6b188bd7dc244e9 ] + +Historically linux tried to stick to RFC 791, 1122, 2003 +for IPv4 ID field generation. + +RFC 6864 made clear that no matter how hard we try, +we can not ensure unicity of IP ID within maximum +lifetime for all datagrams with a given source +address/destination address/protocol tuple. + +Linux uses a per socket inet generator (inet_id), initialized +at connection startup with a XOR of 'jiffies' and other +fields that appear clear on the wire. + +Thiemo Nagel pointed that this strategy is a privacy +concern as this provides 16 bits of entropy to fingerprint +devices. + +Let's switch to a random starting point, this is just as +good as far as RFC 6864 is concerned and does not leak +anything critical. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Reported-by: Thiemo Nagel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv4.c | 2 +- + net/ipv4/datagram.c | 2 +- + net/ipv4/tcp_ipv4.c | 4 ++-- + net/sctp/socket.c | 2 +- + 4 files changed, 5 insertions(+), 5 deletions(-) + +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -121,7 +121,7 @@ int dccp_v4_connect(struct sock *sk, str + inet->inet_daddr, + inet->inet_sport, + inet->inet_dport); +- inet->inet_id = dp->dccps_iss ^ jiffies; ++ inet->inet_id = prandom_u32(); + + err = dccp_connect(sk); + rt = NULL; +--- a/net/ipv4/datagram.c ++++ b/net/ipv4/datagram.c +@@ -75,7 +75,7 @@ int __ip4_datagram_connect(struct sock * + inet->inet_dport = usin->sin_port; + sk->sk_state = TCP_ESTABLISHED; + sk_set_txhash(sk); +- inet->inet_id = jiffies; ++ inet->inet_id = prandom_u32(); + + sk_dst_set(sk, &rt->dst); + err = 0; +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -241,7 +241,7 @@ int tcp_v4_connect(struct sock *sk, stru + inet->inet_sport, + usin->sin_port); + +- inet->inet_id = tp->write_seq ^ jiffies; ++ inet->inet_id = prandom_u32(); + + err = tcp_connect(sk); + +@@ -1302,7 +1302,7 @@ struct sock *tcp_v4_syn_recv_sock(const + inet_csk(newsk)->icsk_ext_hdr_len = 0; + if (inet_opt) + inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; +- newinet->inet_id = newtp->write_seq ^ jiffies; ++ newinet->inet_id = prandom_u32(); + + if (!dst) { + dst = inet_csk_route_child_sock(sk, newsk, req); +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -7267,7 +7267,7 @@ void sctp_copy_sock(struct sock *newsk, + newinet->inet_rcv_saddr = inet->inet_rcv_saddr; + newinet->inet_dport = htons(asoc->peer.port); + newinet->pmtudisc = inet->pmtudisc; +- newinet->inet_id = asoc->next_tsn ^ jiffies; ++ newinet->inet_id = prandom_u32(); + + newinet->uc_ttl = inet->uc_ttl; + newinet->mc_loop = 1; diff --git a/queue-4.4/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch b/queue-4.4/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch new file mode 100644 index 00000000000..84e9b08ddeb --- /dev/null +++ b/queue-4.4/net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch @@ -0,0 +1,79 @@ +From foo@baz Wed 06 Nov 2019 05:02:50 PM CET +From: Eric Dumazet +Date: Wed, 23 Oct 2019 22:44:52 -0700 +Subject: net: add READ_ONCE() annotation in __skb_wait_for_more_packets() + +From: Eric Dumazet + +[ Upstream commit 7c422d0ce97552dde4a97e6290de70ec6efb0fc6 ] + +__skb_wait_for_more_packets() can be called while other cpus +can feed packets to the socket receive queue. + +KCSAN reported : + +BUG: KCSAN: data-race in __skb_wait_for_more_packets / __udp_enqueue_schedule_skb + +write to 0xffff888102e40b58 of 8 bytes by interrupt on cpu 0: + __skb_insert include/linux/skbuff.h:1852 [inline] + __skb_queue_before include/linux/skbuff.h:1958 [inline] + __skb_queue_tail include/linux/skbuff.h:1991 [inline] + __udp_enqueue_schedule_skb+0x2d7/0x410 net/ipv4/udp.c:1470 + __udp_queue_rcv_skb net/ipv4/udp.c:1940 [inline] + udp_queue_rcv_one_skb+0x7bd/0xc70 net/ipv4/udp.c:2057 + udp_queue_rcv_skb+0xb5/0x400 net/ipv4/udp.c:2074 + udp_unicast_rcv_skb.isra.0+0x7e/0x1c0 net/ipv4/udp.c:2233 + __udp4_lib_rcv+0xa44/0x17c0 net/ipv4/udp.c:2300 + udp_rcv+0x2b/0x40 net/ipv4/udp.c:2470 + ip_protocol_deliver_rcu+0x4d/0x420 net/ipv4/ip_input.c:204 + ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 + dst_input include/net/dst.h:442 [inline] + ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 + NF_HOOK include/linux/netfilter.h:305 [inline] + NF_HOOK include/linux/netfilter.h:299 [inline] + ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 + __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010 + __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124 + process_backlog+0x1d3/0x420 net/core/dev.c:5955 + +read to 0xffff888102e40b58 of 8 bytes by task 13035 on cpu 1: + __skb_wait_for_more_packets+0xfa/0x320 net/core/datagram.c:100 + __skb_recv_udp+0x374/0x500 net/ipv4/udp.c:1683 + udp_recvmsg+0xe1/0xb10 net/ipv4/udp.c:1712 + inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838 + sock_recvmsg_nosec+0x5c/0x70 net/socket.c:871 + ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480 + do_recvmmsg+0x19a/0x5c0 net/socket.c:2601 + __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680 + __do_sys_recvmmsg net/socket.c:2703 [inline] + __se_sys_recvmmsg net/socket.c:2696 [inline] + __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696 + do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 13035 Comm: syz-executor.3 Not tainted 5.4.0-rc3+ #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/datagram.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/datagram.c ++++ b/net/core/datagram.c +@@ -96,7 +96,7 @@ static int wait_for_more_packets(struct + if (error) + goto out_err; + +- if (sk->sk_receive_queue.prev != skb) ++ if (READ_ONCE(sk->sk_receive_queue.prev) != skb) + goto out; + + /* Socket shut down? */ diff --git a/queue-4.4/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch b/queue-4.4/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch new file mode 100644 index 00000000000..6538b6c796f --- /dev/null +++ b/queue-4.4/net-fix-sk_page_frag-recursion-from-memory-reclaim.patch @@ -0,0 +1,160 @@ +From foo@baz Wed 06 Nov 2019 05:02:50 PM CET +From: Tejun Heo +Date: Thu, 24 Oct 2019 13:50:27 -0700 +Subject: net: fix sk_page_frag() recursion from memory reclaim + +From: Tejun Heo + +[ Upstream commit 20eb4f29b60286e0d6dc01d9c260b4bd383c58fb ] + +sk_page_frag() optimizes skb_frag allocations by using per-task +skb_frag cache when it knows it's the only user. The condition is +determined by seeing whether the socket allocation mask allows +blocking - if the allocation may block, it obviously owns the task's +context and ergo exclusively owns current->task_frag. + +Unfortunately, this misses recursion through memory reclaim path. +Please take a look at the following backtrace. + + [2] RIP: 0010:tcp_sendmsg_locked+0xccf/0xe10 + ... + tcp_sendmsg+0x27/0x40 + sock_sendmsg+0x30/0x40 + sock_xmit.isra.24+0xa1/0x170 [nbd] + nbd_send_cmd+0x1d2/0x690 [nbd] + nbd_queue_rq+0x1b5/0x3b0 [nbd] + __blk_mq_try_issue_directly+0x108/0x1b0 + blk_mq_request_issue_directly+0xbd/0xe0 + blk_mq_try_issue_list_directly+0x41/0xb0 + blk_mq_sched_insert_requests+0xa2/0xe0 + blk_mq_flush_plug_list+0x205/0x2a0 + blk_flush_plug_list+0xc3/0xf0 + [1] blk_finish_plug+0x21/0x2e + _xfs_buf_ioapply+0x313/0x460 + __xfs_buf_submit+0x67/0x220 + xfs_buf_read_map+0x113/0x1a0 + xfs_trans_read_buf_map+0xbf/0x330 + xfs_btree_read_buf_block.constprop.42+0x95/0xd0 + xfs_btree_lookup_get_block+0x95/0x170 + xfs_btree_lookup+0xcc/0x470 + xfs_bmap_del_extent_real+0x254/0x9a0 + __xfs_bunmapi+0x45c/0xab0 + xfs_bunmapi+0x15/0x30 + xfs_itruncate_extents_flags+0xca/0x250 + xfs_free_eofblocks+0x181/0x1e0 + xfs_fs_destroy_inode+0xa8/0x1b0 + destroy_inode+0x38/0x70 + dispose_list+0x35/0x50 + prune_icache_sb+0x52/0x70 + super_cache_scan+0x120/0x1a0 + do_shrink_slab+0x120/0x290 + shrink_slab+0x216/0x2b0 + shrink_node+0x1b6/0x4a0 + do_try_to_free_pages+0xc6/0x370 + try_to_free_mem_cgroup_pages+0xe3/0x1e0 + try_charge+0x29e/0x790 + mem_cgroup_charge_skmem+0x6a/0x100 + __sk_mem_raise_allocated+0x18e/0x390 + __sk_mem_schedule+0x2a/0x40 + [0] tcp_sendmsg_locked+0x8eb/0xe10 + tcp_sendmsg+0x27/0x40 + sock_sendmsg+0x30/0x40 + ___sys_sendmsg+0x26d/0x2b0 + __sys_sendmsg+0x57/0xa0 + do_syscall_64+0x42/0x100 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +In [0], tcp_send_msg_locked() was using current->page_frag when it +called sk_wmem_schedule(). It already calculated how many bytes can +be fit into current->page_frag. Due to memory pressure, +sk_wmem_schedule() called into memory reclaim path which called into +xfs and then IO issue path. Because the filesystem in question is +backed by nbd, the control goes back into the tcp layer - back into +tcp_sendmsg_locked(). + +nbd sets sk_allocation to (GFP_NOIO | __GFP_MEMALLOC) which makes +sense - it's in the process of freeing memory and wants to be able to, +e.g., drop clean pages to make forward progress. However, this +confused sk_page_frag() called from [2]. Because it only tests +whether the allocation allows blocking which it does, it now thinks +current->page_frag can be used again although it already was being +used in [0]. + +After [2] used current->page_frag, the offset would be increased by +the used amount. When the control returns to [0], +current->page_frag's offset is increased and the previously calculated +number of bytes now may overrun the end of allocated memory leading to +silent memory corruptions. + +Fix it by adding gfpflags_normal_context() which tests sleepable && +!reclaim and use it to determine whether to use current->task_frag. + +v2: Eric didn't like gfp flags being tested twice. Introduce a new + helper gfpflags_normal_context() and combine the two tests. + +Signed-off-by: Tejun Heo +Cc: Josef Bacik +Cc: Eric Dumazet +Cc: stable@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/gfp.h | 23 +++++++++++++++++++++++ + include/net/sock.h | 11 ++++++++--- + 2 files changed, 31 insertions(+), 3 deletions(-) + +--- a/include/linux/gfp.h ++++ b/include/linux/gfp.h +@@ -274,6 +274,29 @@ static inline bool gfpflags_allow_blocki + return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM); + } + ++/** ++ * gfpflags_normal_context - is gfp_flags a normal sleepable context? ++ * @gfp_flags: gfp_flags to test ++ * ++ * Test whether @gfp_flags indicates that the allocation is from the ++ * %current context and allowed to sleep. ++ * ++ * An allocation being allowed to block doesn't mean it owns the %current ++ * context. When direct reclaim path tries to allocate memory, the ++ * allocation context is nested inside whatever %current was doing at the ++ * time of the original allocation. The nested allocation may be allowed ++ * to block but modifying anything %current owns can corrupt the outer ++ * context's expectations. ++ * ++ * %true result from this function indicates that the allocation context ++ * can sleep and use anything that's associated with %current. ++ */ ++static inline bool gfpflags_normal_context(const gfp_t gfp_flags) ++{ ++ return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) == ++ __GFP_DIRECT_RECLAIM; ++} ++ + #ifdef CONFIG_HIGHMEM + #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM + #else +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -2077,12 +2077,17 @@ struct sk_buff *sk_stream_alloc_skb(stru + * sk_page_frag - return an appropriate page_frag + * @sk: socket + * +- * If socket allocation mode allows current thread to sleep, it means its +- * safe to use the per task page_frag instead of the per socket one. ++ * Use the per task page_frag instead of the per socket one for ++ * optimization when we know that we're in the normal context and owns ++ * everything that's associated with %current. ++ * ++ * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest ++ * inside other socket operations and end up recursing into sk_page_frag() ++ * while it's already in use. + */ + static inline struct page_frag *sk_page_frag(struct sock *sk) + { +- if (gfpflags_allow_blocking(sk->sk_allocation)) ++ if (gfpflags_normal_context(sk->sk_allocation)) + return ¤t->task_frag; + + return &sk->sk_frag; diff --git a/queue-4.4/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch b/queue-4.4/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch new file mode 100644 index 00000000000..e4325a647a1 --- /dev/null +++ b/queue-4.4/net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch @@ -0,0 +1,76 @@ +From foo@baz Wed 06 Nov 2019 05:02:50 PM CET +From: Jiangfeng Xiao +Date: Mon, 28 Oct 2019 13:09:46 +0800 +Subject: net: hisilicon: Fix ping latency when deal with high throughput + +From: Jiangfeng Xiao + +[ Upstream commit e56bd641ca61beb92b135298d5046905f920b734 ] + +This is due to error in over budget processing. +When dealing with high throughput, the used buffers +that exceeds the budget is not cleaned up. In addition, +it takes a lot of cycles to clean up the used buffer, +and then the buffer where the valid data is located can take effect. + +Signed-off-by: Jiangfeng Xiao +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/hisilicon/hip04_eth.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/hisilicon/hip04_eth.c ++++ b/drivers/net/ethernet/hisilicon/hip04_eth.c +@@ -174,6 +174,7 @@ struct hip04_priv { + dma_addr_t rx_phys[RX_DESC_NUM]; + unsigned int rx_head; + unsigned int rx_buf_size; ++ unsigned int rx_cnt_remaining; + + struct device_node *phy_node; + struct phy_device *phy; +@@ -487,7 +488,6 @@ static int hip04_rx_poll(struct napi_str + struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi); + struct net_device *ndev = priv->ndev; + struct net_device_stats *stats = &ndev->stats; +- unsigned int cnt = hip04_recv_cnt(priv); + struct rx_desc *desc; + struct sk_buff *skb; + unsigned char *buf; +@@ -500,8 +500,8 @@ static int hip04_rx_poll(struct napi_str + + /* clean up tx descriptors */ + tx_remaining = hip04_tx_reclaim(ndev, false); +- +- while (cnt && !last) { ++ priv->rx_cnt_remaining += hip04_recv_cnt(priv); ++ while (priv->rx_cnt_remaining && !last) { + buf = priv->rx_buf[priv->rx_head]; + skb = build_skb(buf, priv->rx_buf_size); + if (unlikely(!skb)) +@@ -544,11 +544,13 @@ static int hip04_rx_poll(struct napi_str + hip04_set_recv_desc(priv, phys); + + priv->rx_head = RX_NEXT(priv->rx_head); +- if (rx >= budget) ++ if (rx >= budget) { ++ --priv->rx_cnt_remaining; + goto done; ++ } + +- if (--cnt == 0) +- cnt = hip04_recv_cnt(priv); ++ if (--priv->rx_cnt_remaining == 0) ++ priv->rx_cnt_remaining += hip04_recv_cnt(priv); + } + + if (!(priv->reg_inten & RCV_INT)) { +@@ -633,6 +635,7 @@ static int hip04_mac_open(struct net_dev + int i; + + priv->rx_head = 0; ++ priv->rx_cnt_remaining = 0; + priv->tx_head = 0; + priv->tx_tail = 0; + hip04_reset_ppe(priv); diff --git a/queue-4.4/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch b/queue-4.4/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch new file mode 100644 index 00000000000..38916acd340 --- /dev/null +++ b/queue-4.4/net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch @@ -0,0 +1,94 @@ +From foo@baz Wed 06 Nov 2019 04:08:37 PM CET +From: Eran Ben Elisha +Date: Sun, 27 Oct 2019 16:39:15 +0200 +Subject: net/mlx4_core: Dynamically set guaranteed amount of counters per VF + +From: Eran Ben Elisha + +[ Upstream commit e19868efea0c103f23b4b7e986fd0a703822111f ] + +Prior to this patch, the amount of counters guaranteed per VF in the +resource tracker was MLX4_VF_COUNTERS_PER_PORT * MLX4_MAX_PORTS. It was +set regardless if the VF was single or dual port. +This caused several VFs to have no guaranteed counters although the +system could satisfy their request. + +The fix is to dynamically guarantee counters, based on each VF +specification. + +Fixes: 9de92c60beaa ("net/mlx4_core: Adjust counter grant policy in the resource tracker") +Signed-off-by: Eran Ben Elisha +Signed-off-by: Jack Morgenstein +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 42 +++++++++++------- + 1 file changed, 26 insertions(+), 16 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c ++++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +@@ -463,12 +463,31 @@ void mlx4_init_quotas(struct mlx4_dev *d + priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf]; + } + +-static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev) ++static int ++mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev, ++ struct resource_allocator *res_alloc, ++ int vf) + { +- /* reduce the sink counter */ +- return (dev->caps.max_counters - 1 - +- (MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS)) +- / MLX4_MAX_PORTS; ++ struct mlx4_active_ports actv_ports; ++ int ports, counters_guaranteed; ++ ++ /* For master, only allocate according to the number of phys ports */ ++ if (vf == mlx4_master_func_num(dev)) ++ return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports; ++ ++ /* calculate real number of ports for the VF */ ++ actv_ports = mlx4_get_active_ports(dev, vf); ++ ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports); ++ counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT; ++ ++ /* If we do not have enough counters for this VF, do not ++ * allocate any for it. '-1' to reduce the sink counter. ++ */ ++ if ((res_alloc->res_reserved + counters_guaranteed) > ++ (dev->caps.max_counters - 1)) ++ return 0; ++ ++ return counters_guaranteed; + } + + int mlx4_init_resource_tracker(struct mlx4_dev *dev) +@@ -476,7 +495,6 @@ int mlx4_init_resource_tracker(struct ml + struct mlx4_priv *priv = mlx4_priv(dev); + int i, j; + int t; +- int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev); + + priv->mfunc.master.res_tracker.slave_list = + kzalloc(dev->num_slaves * sizeof(struct slave_list), +@@ -593,16 +611,8 @@ int mlx4_init_resource_tracker(struct ml + break; + case RES_COUNTER: + res_alloc->quota[t] = dev->caps.max_counters; +- if (t == mlx4_master_func_num(dev)) +- res_alloc->guaranteed[t] = +- MLX4_PF_COUNTERS_PER_PORT * +- MLX4_MAX_PORTS; +- else if (t <= max_vfs_guarantee_counter) +- res_alloc->guaranteed[t] = +- MLX4_VF_COUNTERS_PER_PORT * +- MLX4_MAX_PORTS; +- else +- res_alloc->guaranteed[t] = 0; ++ res_alloc->guaranteed[t] = ++ mlx4_calc_res_counter_guaranteed(dev, res_alloc, t); + res_alloc->res_free -= res_alloc->guaranteed[t]; + break; + default: diff --git a/queue-4.4/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch b/queue-4.4/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch new file mode 100644 index 00000000000..6d758f52d23 --- /dev/null +++ b/queue-4.4/net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch @@ -0,0 +1,36 @@ +From foo@baz Wed 06 Nov 2019 05:02:50 PM CET +From: zhanglin +Date: Sat, 26 Oct 2019 15:54:16 +0800 +Subject: net: Zeroing the structure ethtool_wolinfo in ethtool_get_wol() + +From: zhanglin + +[ Upstream commit 5ff223e86f5addbfae26419cbb5d61d98f6fbf7d ] + +memset() the structure ethtool_wolinfo that has padded bytes +but the padded bytes have not been zeroed out. + +Signed-off-by: zhanglin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/ethtool.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/core/ethtool.c ++++ b/net/core/ethtool.c +@@ -941,11 +941,13 @@ static int ethtool_reset(struct net_devi + + static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) + { +- struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; ++ struct ethtool_wolinfo wol; + + if (!dev->ethtool_ops->get_wol) + return -EOPNOTSUPP; + ++ memset(&wol, 0, sizeof(struct ethtool_wolinfo)); ++ wol.cmd = ETHTOOL_GWOL; + dev->ethtool_ops->get_wol(dev, &wol); + + if (copy_to_user(useraddr, &wol, sizeof(wol))) diff --git a/queue-4.4/series b/queue-4.4/series index ecc822df83f..4e2183262ca 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -11,3 +11,11 @@ scsi-target-core-do-not-overwrite-cdb-byte-1.patch of-unittest-fix-memory-leak-in-unittest_data_add.patch mips-bmips-mark-exception-vectors-as-char-arrays.patch cifs-fix-cifsinodeinfo-lock_sem-deadlock-when-reconn.patch +dccp-do-not-leak-jiffies-on-the-wire.patch +net-fix-sk_page_frag-recursion-from-memory-reclaim.patch +net-hisilicon-fix-ping-latency-when-deal-with-high-throughput.patch +net-zeroing-the-structure-ethtool_wolinfo-in-ethtool_get_wol.patch +net-add-read_once-annotation-in-__skb_wait_for_more_packets.patch +vxlan-check-tun_info-options_len-properly.patch +net-mlx4_core-dynamically-set-guaranteed-amount-of-counters-per-vf.patch +inet-stop-leaking-jiffies-on-the-wire.patch diff --git a/queue-4.4/vxlan-check-tun_info-options_len-properly.patch b/queue-4.4/vxlan-check-tun_info-options_len-properly.patch new file mode 100644 index 00000000000..e5c218ec9c9 --- /dev/null +++ b/queue-4.4/vxlan-check-tun_info-options_len-properly.patch @@ -0,0 +1,37 @@ +From foo@baz Wed 06 Nov 2019 04:08:37 PM CET +From: Xin Long +Date: Tue, 29 Oct 2019 01:24:32 +0800 +Subject: vxlan: check tun_info options_len properly + +From: Xin Long + +[ Upstream commit eadf52cf1852196a1363044dcda22fa5d7f296f7 ] + +This patch is to improve the tun_info options_len by dropping +the skb when TUNNEL_VXLAN_OPT is set but options_len is less +than vxlan_metadata. This can void a potential out-of-bounds +access on ip_tun_info. + +Fixes: ee122c79d422 ("vxlan: Flow based tunneling") +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -2006,8 +2006,11 @@ static void vxlan_xmit_one(struct sk_buf + ttl = info->key.ttl; + tos = info->key.tos; + +- if (info->options_len) ++ if (info->options_len) { ++ if (info->options_len < sizeof(*md)) ++ goto drop; + md = ip_tunnel_info_opts(info); ++ } + } else { + md->gbp = skb->mark; + }