From: Greg Kroah-Hartman Date: Fri, 18 Nov 2016 10:20:58 +0000 (+0100) Subject: 4.4-stable patches X-Git-Tag: v4.4.34~7 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=15c98251334652c19b4871618e3bbfaf016d5671;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch dccp-do-not-send-reset-to-already-closed-sockets.patch dccp-fix-out-of-bound-access-in-dccp_v4_err.patch dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch fib_trie-correct-proc-net-route-off-by-one-error.patch ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch ipv4-use-new_gw-for-redirect-neigh-lookup.patch ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch net-__skb_flow_dissect-must-cap-its-return-value.patch net-clear-sk_err_soft-in-sk_clone_lock.patch net-mangle-zero-checksum-in-skb_checksum_help.patch sctp-assign-assoc_id-earlier-in-__sctp_connect.patch sock-fix-sendmmsg-for-partial-sendmsg.patch tcp-fix-potential-memory-corruption.patch tcp-take-care-of-truncations-done-by-sk_filter.patch --- diff --git a/queue-4.4/bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch b/queue-4.4/bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch new file mode 100644 index 00000000000..9e05e700829 --- /dev/null +++ b/queue-4.4/bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch @@ -0,0 +1,59 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Andy Gospodarek +Date: Mon, 31 Oct 2016 13:32:03 -0400 +Subject: bgmac: stop clearing DMA receive control register right after it is set + +From: Andy Gospodarek + + +[ Upstream commit fcdefccac976ee51dd6071832b842d8fb41c479c ] + +Current bgmac code initializes some DMA settings in the receive control +register for some hardware and then immediately clears those settings. +Not clearing those settings results in ~420Mbps *improvement* in +throughput; this system can now receive frames at line-rate on Broadcom +5871x hardware compared to ~520Mbps today. I also tested a few other +values but found there to be no discernible difference in CPU +utilization even if burst size and prefetching values are different. + +On the hardware tested there was no need to keep the code that cleared +all but bits 16-17, but since there is a wide variety of hardware that +used this driver (I did not look at all hardware docs for hardware using +this IP block), I find it wise to move this call up and clear bits just +after reading the default value from the hardware rather than completely +removing it. + +This is a good candidate for -stable >=3.14 since that is when the code +that was supposed to improve performance (but did not) was introduced. + +Signed-off-by: Andy Gospodarek +Fixes: 56ceecde1f29 ("bgmac: initialize the DMA controller of core...") +Cc: Hauke Mehrtens +Acked-by: Hauke Mehrtens +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bgmac.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -314,6 +314,10 @@ static void bgmac_dma_rx_enable(struct b + u32 ctl; + + ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL); ++ ++ /* preserve ONLY bits 16-17 from current hardware value */ ++ ctl &= BGMAC_DMA_RX_ADDREXT_MASK; ++ + if (bgmac->core->id.rev >= 4) { + ctl &= ~BGMAC_DMA_RX_BL_MASK; + ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT; +@@ -324,7 +328,6 @@ static void bgmac_dma_rx_enable(struct b + ctl &= ~BGMAC_DMA_RX_PT_MASK; + ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT; + } +- ctl &= BGMAC_DMA_RX_ADDREXT_MASK; + ctl |= BGMAC_DMA_RX_ENABLE; + ctl |= BGMAC_DMA_RX_PARITY_DISABLE; + ctl |= BGMAC_DMA_RX_OVERFLOW_CONT; diff --git a/queue-4.4/dccp-do-not-send-reset-to-already-closed-sockets.patch b/queue-4.4/dccp-do-not-send-reset-to-already-closed-sockets.patch new file mode 100644 index 00000000000..b434e49d743 --- /dev/null +++ b/queue-4.4/dccp-do-not-send-reset-to-already-closed-sockets.patch @@ -0,0 +1,74 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Eric Dumazet +Date: Wed, 2 Nov 2016 18:04:24 -0700 +Subject: dccp: do not send reset to already closed sockets + +From: Eric Dumazet + + +[ Upstream commit 346da62cc186c4b4b1ac59f87f4482b47a047388 ] + +Andrey reported following warning while fuzzing with syzkaller + +WARNING: CPU: 1 PID: 21072 at net/dccp/proto.c:83 dccp_set_state+0x229/0x290 +Kernel panic - not syncing: panic_on_warn set ... + +CPU: 1 PID: 21072 Comm: syz-executor Not tainted 4.9.0-rc1+ #293 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 + ffff88003d4c7738 ffffffff81b474f4 0000000000000003 dffffc0000000000 + ffffffff844f8b00 ffff88003d4c7804 ffff88003d4c7800 ffffffff8140c06a + 0000000041b58ab3 ffffffff8479ab7d ffffffff8140beae ffffffff8140cd00 +Call Trace: + [< inline >] __dump_stack lib/dump_stack.c:15 + [] dump_stack+0xb3/0x10f lib/dump_stack.c:51 + [] panic+0x1bc/0x39d kernel/panic.c:179 + [] __warn+0x1cc/0x1f0 kernel/panic.c:542 + [] warn_slowpath_null+0x2c/0x40 kernel/panic.c:585 + [] dccp_set_state+0x229/0x290 net/dccp/proto.c:83 + [] dccp_close+0x612/0xc10 net/dccp/proto.c:1016 + [] inet_release+0xef/0x1c0 net/ipv4/af_inet.c:415 + [] sock_release+0x8e/0x1d0 net/socket.c:570 + [] sock_close+0x16/0x20 net/socket.c:1017 + [] __fput+0x29d/0x720 fs/file_table.c:208 + [] ____fput+0x15/0x20 fs/file_table.c:244 + [] task_work_run+0xf8/0x170 kernel/task_work.c:116 + [< inline >] exit_task_work include/linux/task_work.h:21 + [] do_exit+0x883/0x2ac0 kernel/exit.c:828 + [] do_group_exit+0x10e/0x340 kernel/exit.c:931 + [] get_signal+0x634/0x15a0 kernel/signal.c:2307 + [] do_signal+0x8d/0x1a30 arch/x86/kernel/signal.c:807 + [] exit_to_usermode_loop+0xe5/0x130 +arch/x86/entry/common.c:156 + [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 + [] syscall_return_slowpath+0x1a8/0x1e0 +arch/x86/entry/common.c:259 + [] entry_SYSCALL_64_fastpath+0xc0/0xc2 +Dumping ftrace buffer: + (ftrace buffer empty) +Kernel Offset: disabled + +Fix this the same way we did for TCP in commit 565b7b2d2e63 +("tcp: do not send reset to already closed sockets") + +Signed-off-by: Eric Dumazet +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/proto.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/dccp/proto.c ++++ b/net/dccp/proto.c +@@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long ti + __kfree_skb(skb); + } + ++ /* If socket has been already reset kill it. */ ++ if (sk->sk_state == DCCP_CLOSED) ++ goto adjudge_to_death; ++ + if (data_was_unread) { + /* Unread data was tossed, send an appropriate Reset Code */ + DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); diff --git a/queue-4.4/dccp-fix-out-of-bound-access-in-dccp_v4_err.patch b/queue-4.4/dccp-fix-out-of-bound-access-in-dccp_v4_err.patch new file mode 100644 index 00000000000..056dd2134de --- /dev/null +++ b/queue-4.4/dccp-fix-out-of-bound-access-in-dccp_v4_err.patch @@ -0,0 +1,56 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Eric Dumazet +Date: Wed, 2 Nov 2016 19:00:40 -0700 +Subject: dccp: fix out of bound access in dccp_v4_err() + +From: Eric Dumazet + + +[ Upstream commit 6706a97fec963d6cb3f7fc2978ec1427b4651214 ] + +dccp_v4_err() does not use pskb_may_pull() and might access garbage. + +We only need 4 bytes at the beginning of the DCCP header, like TCP, +so the 8 bytes pulled in icmp_socket_deliver() are more than enough. + +This patch might allow to process more ICMP messages, as some routers +are still limiting the size of reflected bytes to 28 (RFC 792), instead +of extended lengths (RFC 1812 4.3.2.3) + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv4.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff * + { + const struct iphdr *iph = (struct iphdr *)skb->data; + const u8 offset = iph->ihl << 2; +- const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); ++ const struct dccp_hdr *dh; + struct dccp_sock *dp; + struct inet_sock *inet; + const int type = icmp_hdr(skb)->type; +@@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff * + int err; + struct net *net = dev_net(skb->dev); + +- if (skb->len < offset + sizeof(*dh) || +- skb->len < offset + __dccp_basic_hdr_len(dh)) { +- ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); +- return; +- } ++ /* Only need dccph_dport & dccph_sport which are the first ++ * 4 bytes in dccp header. ++ * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. ++ */ ++ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); ++ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); ++ dh = (struct dccp_hdr *)(skb->data + offset); + + sk = __inet_lookup_established(net, &dccp_hashinfo, + iph->daddr, dh->dccph_dport, diff --git a/queue-4.4/dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch b/queue-4.4/dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch new file mode 100644 index 00000000000..22ed7257be9 --- /dev/null +++ b/queue-4.4/dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch @@ -0,0 +1,87 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Florian Westphal +Date: Fri, 28 Oct 2016 18:43:11 +0200 +Subject: dctcp: avoid bogus doubling of cwnd after loss + +From: Florian Westphal + + +[ Upstream commit ce6dd23329b1ee6a794acf5f7e40f8e89b8317ee ] + +If a congestion control module doesn't provide .undo_cwnd function, +tcp_undo_cwnd_reduction() will set cwnd to + + tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); + +... which makes sense for reno (it sets ssthresh to half the current cwnd), +but it makes no sense for dctcp, which sets ssthresh based on the current +congestion estimate. + +This can cause severe growth of cwnd (eventually overflowing u32). + +Fix this by saving last cwnd on loss and restore cwnd based on that, +similar to cubic and other algorithms. + +Fixes: e3118e8359bb7c ("net: tcp: add DCTCP congestion control algorithm") +Cc: Lawrence Brakmo +Cc: Andrew Shewmaker +Cc: Glenn Judd +Acked-by: Daniel Borkmann +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_dctcp.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_dctcp.c ++++ b/net/ipv4/tcp_dctcp.c +@@ -56,6 +56,7 @@ struct dctcp { + u32 next_seq; + u32 ce_state; + u32 delayed_ack_reserved; ++ u32 loss_cwnd; + }; + + static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ +@@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk) + ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); + + ca->delayed_ack_reserved = 0; ++ ca->loss_cwnd = 0; + ca->ce_state = 0; + + dctcp_reset(tp, ca); +@@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk) + + static u32 dctcp_ssthresh(struct sock *sk) + { +- const struct dctcp *ca = inet_csk_ca(sk); ++ struct dctcp *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + ++ ca->loss_cwnd = tp->snd_cwnd; + return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); + } + +@@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock + return 0; + } + ++static u32 dctcp_cwnd_undo(struct sock *sk) ++{ ++ const struct dctcp *ca = inet_csk_ca(sk); ++ ++ return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); ++} ++ + static struct tcp_congestion_ops dctcp __read_mostly = { + .init = dctcp_init, + .in_ack_event = dctcp_update_alpha, + .cwnd_event = dctcp_cwnd_event, + .ssthresh = dctcp_ssthresh, + .cong_avoid = tcp_reno_cong_avoid, ++ .undo_cwnd = dctcp_cwnd_undo, + .set_state = dctcp_state, + .get_info = dctcp_get_info, + .flags = TCP_CONG_NEEDS_ECN, diff --git a/queue-4.4/fib_trie-correct-proc-net-route-off-by-one-error.patch b/queue-4.4/fib_trie-correct-proc-net-route-off-by-one-error.patch new file mode 100644 index 00000000000..53fa29c6bac --- /dev/null +++ b/queue-4.4/fib_trie-correct-proc-net-route-off-by-one-error.patch @@ -0,0 +1,102 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Alexander Duyck +Date: Fri, 4 Nov 2016 15:11:57 -0400 +Subject: fib_trie: Correct /proc/net/route off by one error + +From: Alexander Duyck + + +[ Upstream commit fd0285a39b1cb496f60210a9a00ad33a815603e7 ] + +The display of /proc/net/route has had a couple issues due to the fact that +when I originally rewrote most of fib_trie I made it so that the iterator +was tracking the next value to use instead of the current. + +In addition it had an off by 1 error where I was tracking the first piece +of data as position 0, even though in reality that belonged to the +SEQ_START_TOKEN. + +This patch updates the code so the iterator tracks the last reported +position and key instead of the next expected position and key. In +addition it shifts things so that all of the leaves start at 1 instead of +trying to report leaves starting with offset 0 as being valid. With these +two issues addressed this should resolve any off by one errors that were +present in the display of /proc/net/route. + +Fixes: 25b97c016b26 ("ipv4: off-by-one in continuation handling in /proc/net/route") +Cc: Andy Whitcroft +Reported-by: Jason Baron +Tested-by: Jason Baron +Signed-off-by: Alexander Duyck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_trie.c | 21 +++++++++------------ + 1 file changed, 9 insertions(+), 12 deletions(-) + +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -2456,22 +2456,19 @@ static struct key_vector *fib_route_get_ + struct key_vector *l, **tp = &iter->tnode; + t_key key; + +- /* use cache location of next-to-find key */ ++ /* use cached location of previously found key */ + if (iter->pos > 0 && pos >= iter->pos) { +- pos -= iter->pos; + key = iter->key; + } else { +- iter->pos = 0; ++ iter->pos = 1; + key = 0; + } + +- while ((l = leaf_walk_rcu(tp, key)) != NULL) { ++ pos -= iter->pos; ++ ++ while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) { + key = l->key + 1; + iter->pos++; +- +- if (--pos <= 0) +- break; +- + l = NULL; + + /* handle unlikely case of a key wrap */ +@@ -2480,7 +2477,7 @@ static struct key_vector *fib_route_get_ + } + + if (l) +- iter->key = key; /* remember it */ ++ iter->key = l->key; /* remember it */ + else + iter->pos = 0; /* forget it */ + +@@ -2508,7 +2505,7 @@ static void *fib_route_seq_start(struct + return fib_route_get_idx(iter, *pos); + + iter->pos = 0; +- iter->key = 0; ++ iter->key = KEY_MAX; + + return SEQ_START_TOKEN; + } +@@ -2517,7 +2514,7 @@ static void *fib_route_seq_next(struct s + { + struct fib_route_iter *iter = seq->private; + struct key_vector *l = NULL; +- t_key key = iter->key; ++ t_key key = iter->key + 1; + + ++*pos; + +@@ -2526,7 +2523,7 @@ static void *fib_route_seq_next(struct s + l = leaf_walk_rcu(&iter->tnode, key); + + if (l) { +- iter->key = l->key + 1; ++ iter->key = l->key; + iter->pos++; + } else { + iter->pos = 0; diff --git a/queue-4.4/ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch b/queue-4.4/ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch new file mode 100644 index 00000000000..702fe79c5ac --- /dev/null +++ b/queue-4.4/ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch @@ -0,0 +1,36 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Eli Cooper +Date: Tue, 1 Nov 2016 23:45:12 +0800 +Subject: ip6_tunnel: Clear IP6CB in ip6tunnel_xmit() + +From: Eli Cooper + + +[ Upstream commit 23f4ffedb7d751c7e298732ba91ca75d224bc1a6 ] + +skb->cb may contain data from previous layers. In the observed scenario, +the garbage data were misinterpreted as IP6CB(skb)->frag_max_size, so +that small packets sent through the tunnel are mistakenly fragmented. + +This patch unconditionally clears the control buffer in ip6tunnel_xmit(), +which affects ip6_tunnel, ip6_udp_tunnel and ip6_gre. Currently none of +these tunnels set IP6CB(skb)->flags, otherwise it needs to be done earlier. + +Cc: stable@vger.kernel.org +Signed-off-by: Eli Cooper +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip6_tunnel.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/include/net/ip6_tunnel.h ++++ b/include/net/ip6_tunnel.h +@@ -86,6 +86,7 @@ static inline void ip6tunnel_xmit(struct + struct net_device_stats *stats = &dev->stats; + int pkt_len, err; + ++ memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + pkt_len = skb->len - skb_inner_network_offset(skb); + err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); + diff --git a/queue-4.4/ipv4-use-new_gw-for-redirect-neigh-lookup.patch b/queue-4.4/ipv4-use-new_gw-for-redirect-neigh-lookup.patch new file mode 100644 index 00000000000..865530ea208 --- /dev/null +++ b/queue-4.4/ipv4-use-new_gw-for-redirect-neigh-lookup.patch @@ -0,0 +1,51 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Stephen Suryaputra Lin +Date: Thu, 10 Nov 2016 11:16:15 -0500 +Subject: ipv4: use new_gw for redirect neigh lookup + +From: Stephen Suryaputra Lin + + +[ Upstream commit 969447f226b451c453ddc83cac6144eaeac6f2e3 ] + +In v2.6, ip_rt_redirect() calls arp_bind_neighbour() which returns 0 +and then the state of the neigh for the new_gw is checked. If the state +isn't valid then the redirected route is deleted. This behavior is +maintained up to v3.5.7 by check_peer_redirect() because rt->rt_gateway +is assigned to peer->redirect_learned.a4 before calling +ipv4_neigh_lookup(). + +After commit 5943634fc559 ("ipv4: Maintain redirect and PMTU info in +struct rtable again."), ipv4_neigh_lookup() is performed without the +rt_gateway assigned to the new_gw. In the case when rt_gateway (old_gw) +isn't zero, the function uses it as the key. The neigh is most likely +valid since the old_gw is the one that sends the ICMP redirect message. +Then the new_gw is assigned to fib_nh_exception. The problem is: the +new_gw ARP may never gets resolved and the traffic is blackholed. + +So, use the new_gw for neigh lookup. + +Changes from v1: + - use __ipv4_neigh_lookup instead (per Eric Dumazet). + +Fixes: 5943634fc559 ("ipv4: Maintain redirect and PMTU info in struct rtable again.") +Signed-off-by: Stephen Suryaputra Lin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -747,7 +747,9 @@ static void __ip_do_redirect(struct rtab + goto reject_redirect; + } + +- n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); ++ n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); ++ if (!n) ++ n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); + if (!IS_ERR(n)) { + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); diff --git a/queue-4.4/ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch b/queue-4.4/ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch new file mode 100644 index 00000000000..b7c62a7ee6f --- /dev/null +++ b/queue-4.4/ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch @@ -0,0 +1,35 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Eric Dumazet +Date: Thu, 3 Nov 2016 08:59:46 -0700 +Subject: ipv6: dccp: add missing bind_conflict to dccp_ipv6_mapped + +From: Eric Dumazet + + +[ Upstream commit 990ff4d84408fc55942ca6644f67e361737b3d8e ] + +While fuzzing kernel with syzkaller, Andrey reported a nasty crash +in inet6_bind() caused by DCCP lacking a required method. + +Fixes: ab1e0a13d7029 ("[SOCK] proto: Add hashinfo member to struct proto") +Signed-off-by: Eric Dumazet +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Cc: Arnaldo Carvalho de Melo +Acked-by: Arnaldo Carvalho de Melo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv6.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -948,6 +948,7 @@ static const struct inet_connection_sock + .getsockopt = ipv6_getsockopt, + .addr2sockaddr = inet6_csk_addr2sockaddr, + .sockaddr_len = sizeof(struct sockaddr_in6), ++ .bind_conflict = inet6_csk_bind_conflict, + #ifdef CONFIG_COMPAT + .compat_setsockopt = compat_ipv6_setsockopt, + .compat_getsockopt = compat_ipv6_getsockopt, diff --git a/queue-4.4/ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch b/queue-4.4/ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch new file mode 100644 index 00000000000..203d0e4320a --- /dev/null +++ b/queue-4.4/ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch @@ -0,0 +1,53 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Eric Dumazet +Date: Wed, 2 Nov 2016 20:30:48 -0700 +Subject: ipv6: dccp: fix out of bound access in dccp_v6_err() + +From: Eric Dumazet + + +[ Upstream commit 1aa9d1a0e7eefcc61696e147d123453fc0016005 ] + +dccp_v6_err() does not use pskb_may_pull() and might access garbage. + +We only need 4 bytes at the beginning of the DCCP header, like TCP, +so the 8 bytes pulled in icmpv6_notify() are more than enough. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv6.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff * + u8 type, u8 code, int offset, __be32 info) + { + const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; +- const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); ++ const struct dccp_hdr *dh; + struct dccp_sock *dp; + struct ipv6_pinfo *np; + struct sock *sk; +@@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff * + __u64 seq; + struct net *net = dev_net(skb->dev); + +- if (skb->len < offset + sizeof(*dh) || +- skb->len < offset + __dccp_basic_hdr_len(dh)) { +- ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), +- ICMP6_MIB_INERRORS); +- return; +- } ++ /* Only need dccph_dport & dccph_sport which are the first ++ * 4 bytes in dccp header. ++ * Our caller (icmpv6_notify()) already pulled 8 bytes for us. ++ */ ++ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); ++ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); ++ dh = (struct dccp_hdr *)(skb->data + offset); + + sk = __inet6_lookup_established(net, &dccp_hashinfo, + &hdr->daddr, dh->dccph_dport, diff --git a/queue-4.4/net-__skb_flow_dissect-must-cap-its-return-value.patch b/queue-4.4/net-__skb_flow_dissect-must-cap-its-return-value.patch new file mode 100644 index 00000000000..7208c718254 --- /dev/null +++ b/queue-4.4/net-__skb_flow_dissect-must-cap-its-return-value.patch @@ -0,0 +1,59 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Eric Dumazet +Date: Wed, 9 Nov 2016 16:04:46 -0800 +Subject: net: __skb_flow_dissect() must cap its return value + +From: Eric Dumazet + + +[ Upstream commit 34fad54c2537f7c99d07375e50cb30aa3c23bd83 ] + +After Tom patch, thoff field could point past the end of the buffer, +this could fool some callers. + +If an skb was provided, skb->len should be the upper limit. +If not, hlen is supposed to be the upper limit. + +Fixes: a6e544b0a88b ("flow_dissector: Jump to exit code in __skb_flow_dissect") +Signed-off-by: Eric Dumazet +Reported-by: Yibin Yang +Acked-by: Willem de Bruijn +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/flow_dissector.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/net/core/flow_dissector.c ++++ b/net/core/flow_dissector.c +@@ -131,7 +131,7 @@ bool __skb_flow_dissect(const struct sk_ + struct flow_dissector_key_tags *key_tags; + struct flow_dissector_key_keyid *key_keyid; + u8 ip_proto = 0; +- bool ret = false; ++ bool ret; + + if (!data) { + data = skb->data; +@@ -492,12 +492,17 @@ ip_proto_again: + out_good: + ret = true; + +-out_bad: ++ key_control->thoff = (u16)nhoff; ++out: + key_basic->n_proto = proto; + key_basic->ip_proto = ip_proto; +- key_control->thoff = (u16)nhoff; + + return ret; ++ ++out_bad: ++ ret = false; ++ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); ++ goto out; + } + EXPORT_SYMBOL(__skb_flow_dissect); + diff --git a/queue-4.4/net-clear-sk_err_soft-in-sk_clone_lock.patch b/queue-4.4/net-clear-sk_err_soft-in-sk_clone_lock.patch new file mode 100644 index 00000000000..32c9f39f7e2 --- /dev/null +++ b/queue-4.4/net-clear-sk_err_soft-in-sk_clone_lock.patch @@ -0,0 +1,34 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Eric Dumazet +Date: Fri, 28 Oct 2016 13:40:24 -0700 +Subject: net: clear sk_err_soft in sk_clone_lock() + +From: Eric Dumazet + + +[ Upstream commit e551c32d57c88923f99f8f010e89ca7ed0735e83 ] + +At accept() time, it is possible the parent has a non zero +sk_err_soft, leftover from a prior error. + +Make sure we do not leave this value in the child, as it +makes future getsockopt(SO_ERROR) calls quite unreliable. + +Signed-off-by: Eric Dumazet +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1562,6 +1562,7 @@ struct sock *sk_clone_lock(const struct + } + + newsk->sk_err = 0; ++ newsk->sk_err_soft = 0; + newsk->sk_priority = 0; + newsk->sk_incoming_cpu = raw_smp_processor_id(); + atomic64_set(&newsk->sk_cookie, 0); diff --git a/queue-4.4/net-mangle-zero-checksum-in-skb_checksum_help.patch b/queue-4.4/net-mangle-zero-checksum-in-skb_checksum_help.patch new file mode 100644 index 00000000000..fea39ab3eb2 --- /dev/null +++ b/queue-4.4/net-mangle-zero-checksum-in-skb_checksum_help.patch @@ -0,0 +1,41 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Eric Dumazet +Date: Sat, 29 Oct 2016 11:02:36 -0700 +Subject: net: mangle zero checksum in skb_checksum_help() + +From: Eric Dumazet + + +[ Upstream commit 4f2e4ad56a65f3b7d64c258e373cb71e8d2499f4 ] + +Sending zero checksum is ok for TCP, but not for UDP. + +UDPv6 receiver should by default drop a frame with a 0 checksum, +and UDPv4 would not verify the checksum and might accept a corrupted +packet. + +Simply replace such checksum by 0xffff, regardless of transport. + +This error was caught on SIT tunnels, but seems generic. + +Signed-off-by: Eric Dumazet +Cc: Maciej Żenczykowski +Cc: Willem de Bruijn +Acked-by: Maciej Żenczykowski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2462,7 +2462,7 @@ int skb_checksum_help(struct sk_buff *sk + goto out; + } + +- *(__sum16 *)(skb->data + offset) = csum_fold(csum); ++ *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; + out_set_summed: + skb->ip_summed = CHECKSUM_NONE; + out: diff --git a/queue-4.4/sctp-assign-assoc_id-earlier-in-__sctp_connect.patch b/queue-4.4/sctp-assign-assoc_id-earlier-in-__sctp_connect.patch new file mode 100644 index 00000000000..958936c68f9 --- /dev/null +++ b/queue-4.4/sctp-assign-assoc_id-earlier-in-__sctp_connect.patch @@ -0,0 +1,57 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Marcelo Ricardo Leitner +Date: Thu, 3 Nov 2016 17:03:41 -0200 +Subject: sctp: assign assoc_id earlier in __sctp_connect + +From: Marcelo Ricardo Leitner + + +[ Upstream commit 7233bc84a3aeda835d334499dc00448373caf5c0 ] + +sctp_wait_for_connect() currently already holds the asoc to keep it +alive during the sleep, in case another thread release it. But Andrey +Konovalov and Dmitry Vyukov reported an use-after-free in such +situation. + +Problem is that __sctp_connect() doesn't get a ref on the asoc and will +do a read on the asoc after calling sctp_wait_for_connect(), but by then +another thread may have closed it and the _put on sctp_wait_for_connect +will actually release it, causing the use-after-free. + +Fix is, instead of doing the read after waiting for the connect, do it +before so, and avoid this issue as the socket is still locked by then. +There should be no issue on returning the asoc id in case of failure as +the application shouldn't trust on that number in such situations +anyway. + +This issue doesn't exist in sctp_sendmsg() path. + +Reported-by: Dmitry Vyukov +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Signed-off-by: Marcelo Ricardo Leitner +Reviewed-by: Xin Long +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -1212,9 +1212,12 @@ static int __sctp_connect(struct sock *s + + timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); + +- err = sctp_wait_for_connect(asoc, &timeo); +- if ((err == 0 || err == -EINPROGRESS) && assoc_id) ++ if (assoc_id) + *assoc_id = asoc->assoc_id; ++ err = sctp_wait_for_connect(asoc, &timeo); ++ /* Note: the asoc may be freed after the return of ++ * sctp_wait_for_connect. ++ */ + + /* Don't free association on exit. */ + asoc = NULL; diff --git a/queue-4.4/series b/queue-4.4/series new file mode 100644 index 00000000000..5fffc5ef28c --- /dev/null +++ b/queue-4.4/series @@ -0,0 +1,16 @@ +dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch +net-clear-sk_err_soft-in-sk_clone_lock.patch +net-mangle-zero-checksum-in-skb_checksum_help.patch +bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch +ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch +tcp-fix-potential-memory-corruption.patch +dccp-do-not-send-reset-to-already-closed-sockets.patch +dccp-fix-out-of-bound-access-in-dccp_v4_err.patch +ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch +ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch +sctp-assign-assoc_id-earlier-in-__sctp_connect.patch +fib_trie-correct-proc-net-route-off-by-one-error.patch +sock-fix-sendmmsg-for-partial-sendmsg.patch +net-__skb_flow_dissect-must-cap-its-return-value.patch +ipv4-use-new_gw-for-redirect-neigh-lookup.patch +tcp-take-care-of-truncations-done-by-sk_filter.patch diff --git a/queue-4.4/sock-fix-sendmmsg-for-partial-sendmsg.patch b/queue-4.4/sock-fix-sendmmsg-for-partial-sendmsg.patch new file mode 100644 index 00000000000..f8a5799ef65 --- /dev/null +++ b/queue-4.4/sock-fix-sendmmsg-for-partial-sendmsg.patch @@ -0,0 +1,49 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Soheil Hassas Yeganeh +Date: Fri, 4 Nov 2016 15:36:49 -0400 +Subject: sock: fix sendmmsg for partial sendmsg + +From: Soheil Hassas Yeganeh + + +[ Upstream commit 3023898b7d4aac65987bd2f485cc22390aae6f78 ] + +Do not send the next message in sendmmsg for partial sendmsg +invocations. + +sendmmsg assumes that it can continue sending the next message +when the return value of the individual sendmsg invocations +is positive. It results in corrupting the data for TCP, +SCTP, and UNIX streams. + +For example, sendmmsg([["abcd"], ["efgh"]]) can result in a stream +of "aefgh" if the first sendmsg invocation sends only the first +byte while the second sendmsg goes through. + +Datagram sockets either send the entire datagram or fail, so +this patch affects only sockets of type SOCK_STREAM and +SOCK_SEQPACKET. + +Fixes: 228e548e6020 ("net: Add sendmmsg socket system call") +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: Eric Dumazet +Signed-off-by: Willem de Bruijn +Signed-off-by: Neal Cardwell +Acked-by: Maciej Żenczykowski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/socket.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/socket.c ++++ b/net/socket.c +@@ -2041,6 +2041,8 @@ int __sys_sendmmsg(int fd, struct mmsghd + if (err) + break; + ++datagrams; ++ if (msg_data_left(&msg_sys)) ++ break; + } + + fput_light(sock->file, fput_needed); diff --git a/queue-4.4/tcp-fix-potential-memory-corruption.patch b/queue-4.4/tcp-fix-potential-memory-corruption.patch new file mode 100644 index 00000000000..a2d137351b5 --- /dev/null +++ b/queue-4.4/tcp-fix-potential-memory-corruption.patch @@ -0,0 +1,40 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Eric Dumazet +Date: Wed, 2 Nov 2016 07:53:17 -0700 +Subject: tcp: fix potential memory corruption + +From: Eric Dumazet + + +[ Upstream commit ac9e70b17ecd7c6e933ff2eaf7ab37429e71bf4d ] + +Imagine initial value of max_skb_frags is 17, and last +skb in write queue has 15 frags. + +Then max_skb_frags is lowered to 14 or smaller value. + +tcp_sendmsg() will then be allowed to add additional page frags +and eventually go past MAX_SKB_FRAGS, overflowing struct +skb_shared_info. + +Fixes: 5f74f82ea34c ("net:Add sysctl_max_skb_frags") +Signed-off-by: Eric Dumazet +Cc: Hans Westgaard Ry +Cc: Håkon Bugge +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1212,7 +1212,7 @@ new_segment: + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { +- if (i == sysctl_max_skb_frags || !sg) { ++ if (i >= sysctl_max_skb_frags || !sg) { + tcp_mark_push(tp, skb); + goto new_segment; + } diff --git a/queue-4.4/tcp-take-care-of-truncations-done-by-sk_filter.patch b/queue-4.4/tcp-take-care-of-truncations-done-by-sk_filter.patch new file mode 100644 index 00000000000..f4b3aeea044 --- /dev/null +++ b/queue-4.4/tcp-take-care-of-truncations-done-by-sk_filter.patch @@ -0,0 +1,159 @@ +From foo@baz Fri Nov 18 11:09:43 CET 2016 +From: Eric Dumazet +Date: Thu, 10 Nov 2016 13:12:35 -0800 +Subject: tcp: take care of truncations done by sk_filter() + +From: Eric Dumazet + + +[ Upstream commit ac6e780070e30e4c35bd395acfe9191e6268bdd3 ] + +With syzkaller help, Marco Grassi found a bug in TCP stack, +crashing in tcp_collapse() + +Root cause is that sk_filter() can truncate the incoming skb, +but TCP stack was not really expecting this to happen. +It probably was expecting a simple DROP or ACCEPT behavior. + +We first need to make sure no part of TCP header could be removed. +Then we need to adjust TCP_SKB_CB(skb)->end_seq + +Many thanks to syzkaller team and Marco for giving us a reproducer. + +Signed-off-by: Eric Dumazet +Reported-by: Marco Grassi +Reported-by: Vladis Dronov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/filter.h | 6 +++++- + include/net/tcp.h | 1 + + net/core/filter.c | 10 +++++----- + net/ipv4/tcp_ipv4.c | 19 ++++++++++++++++++- + net/ipv6/tcp_ipv6.c | 6 ++++-- + 5 files changed, 33 insertions(+), 9 deletions(-) + +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -421,7 +421,11 @@ static inline void bpf_prog_unlock_ro(st + } + #endif /* CONFIG_DEBUG_SET_MODULE_RONX */ + +-int sk_filter(struct sock *sk, struct sk_buff *skb); ++int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); ++static inline int sk_filter(struct sock *sk, struct sk_buff *skb) ++{ ++ return sk_filter_trim_cap(sk, skb, 1); ++} + + int bpf_prog_select_runtime(struct bpf_prog *fp); + void bpf_prog_free(struct bpf_prog *fp); +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1156,6 +1156,7 @@ static inline void tcp_prequeue_init(str + } + + bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); ++int tcp_filter(struct sock *sk, struct sk_buff *skb); + + #undef STATE_TRACE + +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -52,9 +52,10 @@ + #include + + /** +- * sk_filter - run a packet through a socket filter ++ * sk_filter_trim_cap - run a packet through a socket filter + * @sk: sock associated with &sk_buff + * @skb: buffer to filter ++ * @cap: limit on how short the eBPF program may trim the packet + * + * Run the eBPF program and then cut skb->data to correct size returned by + * the program. If pkt_len is 0 we toss packet. If skb->len is smaller +@@ -63,7 +64,7 @@ + * be accepted or -EPERM if the packet should be tossed. + * + */ +-int sk_filter(struct sock *sk, struct sk_buff *skb) ++int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) + { + int err; + struct sk_filter *filter; +@@ -84,14 +85,13 @@ int sk_filter(struct sock *sk, struct sk + filter = rcu_dereference(sk->sk_filter); + if (filter) { + unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb); +- +- err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; ++ err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; + } + rcu_read_unlock(); + + return err; + } +-EXPORT_SYMBOL(sk_filter); ++EXPORT_SYMBOL(sk_filter_trim_cap); + + static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) + { +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1533,6 +1533,21 @@ bool tcp_prequeue(struct sock *sk, struc + } + EXPORT_SYMBOL(tcp_prequeue); + ++int tcp_filter(struct sock *sk, struct sk_buff *skb) ++{ ++ struct tcphdr *th = (struct tcphdr *)skb->data; ++ unsigned int eaten = skb->len; ++ int err; ++ ++ err = sk_filter_trim_cap(sk, skb, th->doff * 4); ++ if (!err) { ++ eaten -= skb->len; ++ TCP_SKB_CB(skb)->end_seq -= eaten; ++ } ++ return err; ++} ++EXPORT_SYMBOL(tcp_filter); ++ + /* + * From tcp_input.c + */ +@@ -1638,8 +1653,10 @@ process: + + nf_reset(skb); + +- if (sk_filter(sk, skb)) ++ if (tcp_filter(sk, skb)) + goto discard_and_relse; ++ th = (const struct tcphdr *)skb->data; ++ iph = ip_hdr(skb); + + skb->dev = NULL; + +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1214,7 +1214,7 @@ static int tcp_v6_do_rcv(struct sock *sk + if (skb->protocol == htons(ETH_P_IP)) + return tcp_v4_do_rcv(sk, skb); + +- if (sk_filter(sk, skb)) ++ if (tcp_filter(sk, skb)) + goto discard; + + /* +@@ -1438,8 +1438,10 @@ process: + if (tcp_v6_inbound_md5_hash(sk, skb)) + goto discard_and_relse; + +- if (sk_filter(sk, skb)) ++ if (tcp_filter(sk, skb)) + goto discard_and_relse; ++ th = (const struct tcphdr *)skb->data; ++ hdr = ipv6_hdr(skb); + + skb->dev = NULL; +