From: Greg Kroah-Hartman Date: Mon, 29 Jan 2018 10:57:01 +0000 (+0100) Subject: 3.18-stable patches X-Git-Tag: v4.4.114~9 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8f379c9614f167af409d32de24e4ba5417bb6874;p=thirdparty%2Fkernel%2Fstable-queue.git 3.18-stable patches added patches: dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch net-igmp-fix-source-address-check-for-igmpv3-reports.patch net-qdisc_pkt_len_init-should-be-more-robust.patch net-tcp-close-sock-if-net-namespace-is-exiting.patch pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch tcp-__tcp_hdrlen-helper.patch vmxnet3-repair-memory-leak.patch --- diff --git a/queue-3.18/dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch b/queue-3.18/dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch new file mode 100644 index 00000000000..66838d3332e --- /dev/null +++ b/queue-3.18/dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch @@ -0,0 +1,44 @@ +From foo@baz Mon Jan 29 11:15:06 CET 2018 +From: Alexey Kodanev +Date: Fri, 26 Jan 2018 15:14:16 +0300 +Subject: dccp: don't restart ccid2_hc_tx_rto_expire() if sk in closed state + +From: Alexey Kodanev + + +[ Upstream commit dd5684ecae3bd8e44b644f50e2c12c7e57fdfef5 ] + +ccid2_hc_tx_rto_expire() timer callback always restarts the timer +again and can run indefinitely (unless it is stopped outside), and after +commit 120e9dabaf55 ("dccp: defer ccid_hc_tx_delete() at dismantle time"), +which moved ccid_hc_tx_delete() (also includes sk_stop_timer()) from +dccp_destroy_sock() to sk_destruct(), this started to happen quite often. +The timer prevents releasing the socket, as a result, sk_destruct() won't +be called. + +Found with LTP/dccp_ipsec tests running on the bonding device, +which later couldn't be unloaded after the tests were completed: + + unregister_netdevice: waiting for bond0 to become free. Usage count = 148 + +Fixes: 2a91aa396739 ("[DCCP] CCID2: Initial CCID2 (TCP-Like) implementation") +Signed-off-by: Alexey Kodanev +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ccids/ccid2.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/dccp/ccids/ccid2.c ++++ b/net/dccp/ccids/ccid2.c +@@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(unsig + + ccid2_pr_debug("RTO_EXPIRE\n"); + ++ if (sk->sk_state == DCCP_CLOSED) ++ goto out; ++ + /* back-off timer */ + hc->tx_rto <<= 1; + if (hc->tx_rto > DCCP_RTO_MAX) diff --git a/queue-3.18/ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch b/queue-3.18/ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch new file mode 100644 index 00000000000..f71bcda11e0 --- /dev/null +++ b/queue-3.18/ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch @@ -0,0 +1,58 @@ +From foo@baz Mon Jan 29 11:15:06 CET 2018 +From: Jim Westfall +Date: Sun, 14 Jan 2018 04:18:51 -0800 +Subject: ipv4: Make neigh lookup keys for loopback/point-to-point devices be INADDR_ANY + +From: Jim Westfall + + +[ Upstream commit cd9ff4de0107c65d69d02253bb25d6db93c3dbc1 ] + +Map all lookup neigh keys to INADDR_ANY for loopback/point-to-point devices +to avoid making an entry for every remote ip the device needs to talk to. + +This used the be the old behavior but became broken in a263b3093641f +(ipv4: Make neigh lookups directly in output packet path) and later removed +in 0bb4087cbec0 (ipv4: Fix neigh lookup keying over loopback/point-to-point +devices) because it was broken. + +Signed-off-by: Jim Westfall +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/arp.h | 3 +++ + net/ipv4/arp.c | 7 ++++++- + 2 files changed, 9 insertions(+), 1 deletion(-) + +--- a/include/net/arp.h ++++ b/include/net/arp.h +@@ -37,6 +37,9 @@ static inline struct neighbour *__ipv4_n + { + struct neighbour *n; + ++ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) ++ key = INADDR_ANY; ++ + rcu_read_lock_bh(); + n = __ipv4_neigh_lookup_noref(dev, key); + if (n && !atomic_inc_not_zero(&n->refcnt)) +--- a/net/ipv4/arp.c ++++ b/net/ipv4/arp.c +@@ -221,11 +221,16 @@ static u32 arp_hash(const void *pkey, + + static int arp_constructor(struct neighbour *neigh) + { +- __be32 addr = *(__be32 *)neigh->primary_key; ++ __be32 addr; + struct net_device *dev = neigh->dev; + struct in_device *in_dev; + struct neigh_parms *parms; ++ u32 inaddr_any = INADDR_ANY; + ++ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) ++ memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len); ++ ++ addr = *(__be32 *)neigh->primary_key; + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (in_dev == NULL) { diff --git a/queue-3.18/ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch b/queue-3.18/ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch new file mode 100644 index 00000000000..ebd300a87b7 --- /dev/null +++ b/queue-3.18/ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch @@ -0,0 +1,94 @@ +From foo@baz Mon Jan 29 11:15:06 CET 2018 +From: Mike Maloney +Date: Wed, 10 Jan 2018 12:45:10 -0500 +Subject: ipv6: fix udpv6 sendmsg crash caused by too small MTU + +From: Mike Maloney + + +[ Upstream commit 749439bfac6e1a2932c582e2699f91d329658196 ] + +The logic in __ip6_append_data() assumes that the MTU is at least large +enough for the headers. A device's MTU may be adjusted after being +added while sendmsg() is processing data, resulting in +__ip6_append_data() seeing any MTU. For an mtu smaller than the size of +the fragmentation header, the math results in a negative 'maxfraglen', +which causes problems when refragmenting any previous skb in the +skb_write_queue, leaving it possibly malformed. + +Instead sendmsg returns EINVAL when the mtu is calculated to be less +than IPV6_MIN_MTU. + +Found by syzkaller: +kernel BUG at ./include/linux/skbuff.h:2064! +invalid opcode: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 1 PID: 14216 Comm: syz-executor5 Not tainted 4.13.0-rc4+ #2 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +task: ffff8801d0b68580 task.stack: ffff8801ac6b8000 +RIP: 0010:__skb_pull include/linux/skbuff.h:2064 [inline] +RIP: 0010:__ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 +RSP: 0018:ffff8801ac6bf570 EFLAGS: 00010216 +RAX: 0000000000010000 RBX: 0000000000000028 RCX: ffffc90003cce000 +RDX: 00000000000001b8 RSI: ffffffff839df06f RDI: ffff8801d9478ca0 +RBP: ffff8801ac6bf780 R08: ffff8801cc3f1dbc R09: 0000000000000000 +R10: ffff8801ac6bf7a0 R11: 43cb4b7b1948a9e7 R12: ffff8801cc3f1dc8 +R13: ffff8801cc3f1d40 R14: 0000000000001036 R15: dffffc0000000000 +FS: 00007f43d740c700(0000) GS:ffff8801dc100000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f7834984000 CR3: 00000001d79b9000 CR4: 00000000001406e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + ip6_finish_skb include/net/ipv6.h:911 [inline] + udp_v6_push_pending_frames+0x255/0x390 net/ipv6/udp.c:1093 + udpv6_sendmsg+0x280d/0x31a0 net/ipv6/udp.c:1363 + inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762 + sock_sendmsg_nosec net/socket.c:633 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:643 + SYSC_sendto+0x352/0x5a0 net/socket.c:1750 + SyS_sendto+0x40/0x50 net/socket.c:1718 + entry_SYSCALL_64_fastpath+0x1f/0xbe +RIP: 0033:0x4512e9 +RSP: 002b:00007f43d740bc08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c +RAX: ffffffffffffffda RBX: 00000000007180a8 RCX: 00000000004512e9 +RDX: 000000000000002e RSI: 0000000020d08000 RDI: 0000000000000005 +RBP: 0000000000000086 R08: 00000000209c1000 R09: 000000000000001c +R10: 0000000000040800 R11: 0000000000000216 R12: 00000000004b9c69 +R13: 00000000ffffffff R14: 0000000000000005 R15: 00000000202c2000 +Code: 9e 01 fe e9 c5 e8 ff ff e8 7f 9e 01 fe e9 4a ea ff ff 48 89 f7 e8 52 9e 01 fe e9 aa eb ff ff e8 a8 b6 cf fd 0f 0b e8 a1 b6 cf fd <0f> 0b 49 8d 45 78 4d 8d 45 7c 48 89 85 78 fe ff ff 49 8d 85 ba +RIP: __skb_pull include/linux/skbuff.h:2064 [inline] RSP: ffff8801ac6bf570 +RIP: __ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 RSP: ffff8801ac6bf570 + +Reported-by: syzbot +Signed-off-by: Mike Maloney +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1214,14 +1214,16 @@ int ip6_append_data(struct sock *sk, int + np->cork.tclass = tclass; + if (rt->dst.flags & DST_XFRM_TUNNEL) + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? +- rt->dst.dev->mtu : dst_mtu(&rt->dst); ++ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); + else + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? +- rt->dst.dev->mtu : dst_mtu(rt->dst.path); ++ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path); + if (np->frag_size < mtu) { + if (np->frag_size) + mtu = np->frag_size; + } ++ if (mtu < IPV6_MIN_MTU) ++ return -EINVAL; + cork->fragsize = mtu; + if (dst_allfrag(rt->dst.path)) + cork->flags |= IPCORK_ALLFRAG; diff --git a/queue-3.18/net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch b/queue-3.18/net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch new file mode 100644 index 00000000000..6b461e166d9 --- /dev/null +++ b/queue-3.18/net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch @@ -0,0 +1,40 @@ +From foo@baz Mon Jan 29 11:15:06 CET 2018 +From: Jim Westfall +Date: Sun, 14 Jan 2018 04:18:50 -0800 +Subject: net: Allow neigh contructor functions ability to modify the primary_key + +From: Jim Westfall + + +[ Upstream commit 096b9854c04df86f03b38a97d40b6506e5730919 ] + +Use n->primary_key instead of pkey to account for the possibility that a neigh +constructor function may have modified the primary_key value. + +Signed-off-by: Jim Westfall +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/neighbour.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/core/neighbour.c ++++ b/net/core/neighbour.c +@@ -508,7 +508,7 @@ struct neighbour *__neigh_create(struct + if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) + nht = neigh_hash_grow(tbl, nht->hash_shift + 1); + +- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); ++ hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); + + if (n->parms->dead) { + rc = ERR_PTR(-EINVAL); +@@ -520,7 +520,7 @@ struct neighbour *__neigh_create(struct + n1 != NULL; + n1 = rcu_dereference_protected(n1->next, + lockdep_is_held(&tbl->lock))) { +- if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { ++ if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { + if (want_ref) + neigh_hold(n1); + rc = n1; diff --git a/queue-3.18/net-igmp-fix-source-address-check-for-igmpv3-reports.patch b/queue-3.18/net-igmp-fix-source-address-check-for-igmpv3-reports.patch new file mode 100644 index 00000000000..1ee58b0b734 --- /dev/null +++ b/queue-3.18/net-igmp-fix-source-address-check-for-igmpv3-reports.patch @@ -0,0 +1,41 @@ +From foo@baz Mon Jan 29 11:15:06 CET 2018 +From: Felix Fietkau +Date: Fri, 19 Jan 2018 11:50:46 +0100 +Subject: net: igmp: fix source address check for IGMPv3 reports + +From: Felix Fietkau + + +[ Upstream commit ad23b750933ea7bf962678972a286c78a8fa36aa ] + +Commit "net: igmp: Use correct source address on IGMPv3 reports" +introduced a check to validate the source address of locally generated +IGMPv3 packets. +Instead of checking the local interface address directly, it uses +inet_ifa_match(fl4->saddr, ifa), which checks if the address is on the +local subnet (or equal to the point-to-point address if used). + +This breaks for point-to-point interfaces, so check against +ifa->ifa_local directly. + +Cc: Kevin Cernekee +Fixes: a46182b00290 ("net: igmp: Use correct source address on IGMPv3 reports") +Reported-by: Sebastian Gottschall +Signed-off-by: Felix Fietkau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/igmp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -329,7 +329,7 @@ static __be32 igmpv3_get_srcaddr(struct + return htonl(INADDR_ANY); + + for_ifa(in_dev) { +- if (inet_ifa_match(fl4->saddr, ifa)) ++ if (fl4->saddr == ifa->ifa_local) + return fl4->saddr; + } endfor_ifa(in_dev); + diff --git a/queue-3.18/net-qdisc_pkt_len_init-should-be-more-robust.patch b/queue-3.18/net-qdisc_pkt_len_init-should-be-more-robust.patch new file mode 100644 index 00000000000..42369e7a981 --- /dev/null +++ b/queue-3.18/net-qdisc_pkt_len_init-should-be-more-robust.patch @@ -0,0 +1,61 @@ +From foo@baz Mon Jan 29 11:15:06 CET 2018 +From: Eric Dumazet +Date: Thu, 18 Jan 2018 19:59:19 -0800 +Subject: net: qdisc_pkt_len_init() should be more robust + +From: Eric Dumazet + + +[ Upstream commit 7c68d1a6b4db9012790af7ac0f0fdc0d2083422a ] + +Without proper validation of DODGY packets, we might very well +feed qdisc_pkt_len_init() with invalid GSO packets. + +tcp_hdrlen() might access out-of-bound data, so let's use +skb_header_pointer() and proper checks. + +Whole story is described in commit d0c081b49137 ("flow_dissector: +properly cap thoff field") + +We have the goal of validating DODGY packets earlier in the stack, +so we might very well revert this fix in the future. + +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Cc: Jason Wang +Reported-by: syzbot+9da69ebac7dddd804552@syzkaller.appspotmail.com +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2772,10 +2772,21 @@ static void qdisc_pkt_len_init(struct sk + hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + + /* + transport layer */ +- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) +- hdr_len += tcp_hdrlen(skb); +- else +- hdr_len += sizeof(struct udphdr); ++ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { ++ const struct tcphdr *th; ++ struct tcphdr _tcphdr; ++ ++ th = skb_header_pointer(skb, skb_transport_offset(skb), ++ sizeof(_tcphdr), &_tcphdr); ++ if (likely(th)) ++ hdr_len += __tcp_hdrlen(th); ++ } else { ++ struct udphdr _udphdr; ++ ++ if (skb_header_pointer(skb, skb_transport_offset(skb), ++ sizeof(_udphdr), &_udphdr)) ++ hdr_len += sizeof(struct udphdr); ++ } + + if (shinfo->gso_type & SKB_GSO_DODGY) + gso_segs = DIV_ROUND_UP(skb->len - hdr_len, diff --git a/queue-3.18/net-tcp-close-sock-if-net-namespace-is-exiting.patch b/queue-3.18/net-tcp-close-sock-if-net-namespace-is-exiting.patch new file mode 100644 index 00000000000..08aa5b2e58e --- /dev/null +++ b/queue-3.18/net-tcp-close-sock-if-net-namespace-is-exiting.patch @@ -0,0 +1,120 @@ +From foo@baz Mon Jan 29 10:14:57 CET 2018 +From: Dan Streetman +Date: Thu, 18 Jan 2018 16:14:26 -0500 +Subject: net: tcp: close sock if net namespace is exiting + +From: Dan Streetman + + +[ Upstream commit 4ee806d51176ba7b8ff1efd81f271d7252e03a1d ] + +When a tcp socket is closed, if it detects that its net namespace is +exiting, close immediately and do not wait for FIN sequence. + +For normal sockets, a reference is taken to their net namespace, so it will +never exit while the socket is open. However, kernel sockets do not take a +reference to their net namespace, so it may begin exiting while the kernel +socket is still open. In this case if the kernel socket is a tcp socket, +it will stay open trying to complete its close sequence. The sock's dst(s) +hold a reference to their interface, which are all transferred to the +namespace's loopback interface when the real interfaces are taken down. +When the namespace tries to take down its loopback interface, it hangs +waiting for all references to the loopback interface to release, which +results in messages like: + +unregister_netdevice: waiting for lo to become free. Usage count = 1 + +These messages continue until the socket finally times out and closes. +Since the net namespace cleanup holds the net_mutex while calling its +registered pernet callbacks, any new net namespace initialization is +blocked until the current net namespace finishes exiting. + +After this change, the tcp socket notices the exiting net namespace, and +closes immediately, releasing its dst(s) and their reference to the +loopback interface, which lets the net namespace continue exiting. + +Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1711407 +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=97811 +Signed-off-by: Dan Streetman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/net_namespace.h | 10 ++++++++++ + net/ipv4/tcp.c | 3 +++ + net/ipv4/tcp_timer.c | 15 +++++++++++++++ + 3 files changed, 28 insertions(+) + +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -200,6 +200,11 @@ int net_eq(const struct net *net1, const + return net1 == net2; + } + ++static inline int check_net(const struct net *net) ++{ ++ return atomic_read(&net->count) != 0; ++} ++ + void net_drop_ns(void *); + + #else +@@ -223,6 +228,11 @@ int net_eq(const struct net *net1, const + { + return 1; + } ++ ++static inline int check_net(const struct net *net) ++{ ++ return 1; ++} + + #define net_drop_ns NULL + #endif +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2182,6 +2182,9 @@ adjudge_to_death: + tcp_send_active_reset(sk, GFP_ATOMIC); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPABORTONMEMORY); ++ } else if (!check_net(sock_net(sk))) { ++ /* Not possible to send reset; just close */ ++ tcp_set_state(sk, TCP_CLOSE); + } + } + +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -46,11 +46,19 @@ static void tcp_write_err(struct sock *s + * to prevent DoS attacks. It is called when a retransmission timeout + * or zero probe timeout occurs on orphaned socket. + * ++ * Also close if our net namespace is exiting; in that case there is no ++ * hope of ever communicating again since all netns interfaces are already ++ * down (or about to be down), and we need to release our dst references, ++ * which have been moved to the netns loopback interface, so the namespace ++ * can finish exiting. This condition is only possible if we are a kernel ++ * socket, as those do not hold references to the namespace. ++ * + * Criteria is still not confirmed experimentally and may change. + * We kill the socket, if: + * 1. If number of orphaned sockets exceeds an administratively configured + * limit. + * 2. If we have strong memory pressure. ++ * 3. If our net namespace is exiting. + */ + static int tcp_out_of_resources(struct sock *sk, bool do_reset) + { +@@ -79,6 +87,13 @@ static int tcp_out_of_resources(struct s + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); + return 1; + } ++ ++ if (!check_net(sock_net(sk))) { ++ /* Not possible to send reset; just close */ ++ tcp_done(sk); ++ return 1; ++ } ++ + return 0; + } + diff --git a/queue-3.18/pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch b/queue-3.18/pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch new file mode 100644 index 00000000000..5f2a71025e4 --- /dev/null +++ b/queue-3.18/pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch @@ -0,0 +1,126 @@ +From foo@baz Mon Jan 29 11:15:06 CET 2018 +From: Guillaume Nault +Date: Mon, 22 Jan 2018 18:06:37 +0100 +Subject: pppoe: take ->needed_headroom of lower device into account on xmit + +From: Guillaume Nault + + +[ Upstream commit 02612bb05e51df8489db5e94d0cf8d1c81f87b0c ] + +In pppoe_sendmsg(), reserving dev->hard_header_len bytes of headroom +was probably fine before the introduction of ->needed_headroom in +commit f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom"). + +But now, virtual devices typically advertise the size of their overhead +in dev->needed_headroom, so we must also take it into account in +skb_reserve(). +Allocation size of skb is also updated to take dev->needed_tailroom +into account and replace the arbitrary 32 bytes with the real size of +a PPPoE header. + +This issue was discovered by syzbot, who connected a pppoe socket to a +gre device which had dev->header_ops->create == ipgre_header and +dev->hard_header_len == 0. Therefore, PPPoE didn't reserve any +headroom, and dev_hard_header() crashed when ipgre_header() tried to +prepend its header to skb->data. + +skbuff: skb_under_panic: text:000000001d390b3a len:31 put:24 +head:00000000d8ed776f data:000000008150e823 tail:0x7 end:0xc0 dev:gre0 +------------[ cut here ]------------ +kernel BUG at net/core/skbuff.c:104! +invalid opcode: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 1 PID: 3670 Comm: syzkaller801466 Not tainted +4.15.0-rc7-next-20180115+ #97 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS +Google 01/01/2011 +RIP: 0010:skb_panic+0x162/0x1f0 net/core/skbuff.c:100 +RSP: 0018:ffff8801d9bd7840 EFLAGS: 00010282 +RAX: 0000000000000083 RBX: ffff8801d4f083c0 RCX: 0000000000000000 +RDX: 0000000000000083 RSI: 1ffff1003b37ae92 RDI: ffffed003b37aefc +RBP: ffff8801d9bd78a8 R08: 1ffff1003b37ae8a R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff86200de0 +R13: ffffffff84a981ad R14: 0000000000000018 R15: ffff8801d2d34180 +FS: 00000000019c4880(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00000000208bc000 CR3: 00000001d9111001 CR4: 00000000001606e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + skb_under_panic net/core/skbuff.c:114 [inline] + skb_push+0xce/0xf0 net/core/skbuff.c:1714 + ipgre_header+0x6d/0x4e0 net/ipv4/ip_gre.c:879 + dev_hard_header include/linux/netdevice.h:2723 [inline] + pppoe_sendmsg+0x58e/0x8b0 drivers/net/ppp/pppoe.c:890 + sock_sendmsg_nosec net/socket.c:630 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:640 + sock_write_iter+0x31a/0x5d0 net/socket.c:909 + call_write_iter include/linux/fs.h:1775 [inline] + do_iter_readv_writev+0x525/0x7f0 fs/read_write.c:653 + do_iter_write+0x154/0x540 fs/read_write.c:932 + vfs_writev+0x18a/0x340 fs/read_write.c:977 + do_writev+0xfc/0x2a0 fs/read_write.c:1012 + SYSC_writev fs/read_write.c:1085 [inline] + SyS_writev+0x27/0x30 fs/read_write.c:1082 + entry_SYSCALL_64_fastpath+0x29/0xa0 + +Admittedly PPPoE shouldn't be allowed to run on non Ethernet-like +interfaces, but reserving space for ->needed_headroom is a more +fundamental issue that needs to be addressed first. + +Same problem exists for __pppoe_xmit(), which also needs to take +dev->needed_headroom into account in skb_cow_head(). + +Fixes: f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom") +Reported-by: syzbot+ed0838d0fa4c4f2b528e20286e6dc63effc7c14d@syzkaller.appspotmail.com +Signed-off-by: Guillaume Nault +Reviewed-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/pppoe.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/net/ppp/pppoe.c ++++ b/drivers/net/ppp/pppoe.c +@@ -830,6 +830,7 @@ static int pppoe_sendmsg(struct kiocb *i + struct pppoe_hdr *ph; + struct net_device *dev; + char *start; ++ int hlen; + + lock_sock(sk); + if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { +@@ -848,16 +849,16 @@ static int pppoe_sendmsg(struct kiocb *i + if (total_len > (dev->mtu + dev->hard_header_len)) + goto end; + +- +- skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32, +- 0, GFP_KERNEL); ++ hlen = LL_RESERVED_SPACE(dev); ++ skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len + ++ dev->needed_tailroom, 0, GFP_KERNEL); + if (!skb) { + error = -ENOMEM; + goto end; + } + + /* Reserve space for headers. */ +- skb_reserve(skb, dev->hard_header_len); ++ skb_reserve(skb, hlen); + skb_reset_network_header(skb); + + skb->dev = dev; +@@ -918,7 +919,7 @@ static int __pppoe_xmit(struct sock *sk, + /* Copy the data if there is no space for the header or if it's + * read-only. + */ +- if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len)) ++ if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph))) + goto abort; + + __skb_push(skb, sizeof(*ph)); diff --git a/queue-3.18/sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch b/queue-3.18/sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch new file mode 100644 index 00000000000..01128268e9f --- /dev/null +++ b/queue-3.18/sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch @@ -0,0 +1,56 @@ +From foo@baz Mon Jan 29 11:15:06 CET 2018 +From: Xin Long +Date: Mon, 15 Jan 2018 17:02:00 +0800 +Subject: sctp: do not allow the v4 socket to bind a v4mapped v6 address + +From: Xin Long + + +[ Upstream commit c5006b8aa74599ce19104b31d322d2ea9ff887cc ] + +The check in sctp_sockaddr_af is not robust enough to forbid binding a +v4mapped v6 addr on a v4 socket. + +The worse thing is that v4 socket's bind_verify would not convert this +v4mapped v6 addr to a v4 addr. syzbot even reported a crash as the v4 +socket bound a v6 addr. + +This patch is to fix it by doing the common sa.sa_family check first, +then AF_INET check for v4mapped v6 addrs. + +Fixes: 7dab83de50c7 ("sctp: Support ipv6only AF_INET6 sockets.") +Reported-by: syzbot+7b7b518b1228d2743963@syzkaller.appspotmail.com +Acked-by: Neil Horman +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -333,16 +333,14 @@ static struct sctp_af *sctp_sockaddr_af( + if (len < sizeof (struct sockaddr)) + return NULL; + ++ if (!opt->pf->af_supported(addr->sa.sa_family, opt)) ++ return NULL; ++ + /* V4 mapped address are really of AF_INET family */ + if (addr->sa.sa_family == AF_INET6 && +- ipv6_addr_v4mapped(&addr->v6.sin6_addr)) { +- if (!opt->pf->af_supported(AF_INET, opt)) +- return NULL; +- } else { +- /* Does this PF support this AF? */ +- if (!opt->pf->af_supported(addr->sa.sa_family, opt)) +- return NULL; +- } ++ ipv6_addr_v4mapped(&addr->v6.sin6_addr) && ++ !opt->pf->af_supported(AF_INET, opt)) ++ return NULL; + + /* If we get this far, af is valid. */ + af = sctp_get_af_specific(addr->sa.sa_family); diff --git a/queue-3.18/sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch b/queue-3.18/sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch new file mode 100644 index 00000000000..9ca3c3ae2a9 --- /dev/null +++ b/queue-3.18/sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch @@ -0,0 +1,87 @@ +From foo@baz Mon Jan 29 11:15:06 CET 2018 +From: Xin Long +Date: Mon, 15 Jan 2018 17:01:36 +0800 +Subject: sctp: return error if the asoc has been peeled off in sctp_wait_for_sndbuf + +From: Xin Long + + +[ Upstream commit a0ff660058b88d12625a783ce9e5c1371c87951f ] + +After commit cea0cc80a677 ("sctp: use the right sk after waking up from +wait_buf sleep"), it may change to lock another sk if the asoc has been +peeled off in sctp_wait_for_sndbuf. + +However, the asoc's new sk could be already closed elsewhere, as it's in +the sendmsg context of the old sk that can't avoid the new sk's closing. +If the sk's last one refcnt is held by this asoc, later on after putting +this asoc, the new sk will be freed, while under it's own lock. + +This patch is to revert that commit, but fix the old issue by returning +error under the old sk's lock. + +Fixes: cea0cc80a677 ("sctp: use the right sk after waking up from wait_buf sleep") +Reported-by: syzbot+ac6ea7baa4432811eb50@syzkaller.appspotmail.com +Signed-off-by: Xin Long +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -83,7 +83,7 @@ + static int sctp_writeable(struct sock *sk); + static void sctp_wfree(struct sk_buff *skb); + static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, +- size_t msg_len, struct sock **orig_sk); ++ size_t msg_len); + static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); + static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); + static int sctp_wait_for_accept(struct sock *sk, long timeo); +@@ -1948,7 +1948,7 @@ static int sctp_sendmsg(struct kiocb *io + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + if (!sctp_wspace(asoc)) { + /* sk can be changed by peel off when waiting for buf. */ +- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk); ++ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); + if (err) { + if (err == -ESRCH) { + /* asoc is already dead. */ +@@ -6981,12 +6981,12 @@ void sctp_sock_rfree(struct sk_buff *skb + + /* Helper function to wait for space in the sndbuf. */ + static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, +- size_t msg_len, struct sock **orig_sk) ++ size_t msg_len) + { + struct sock *sk = asoc->base.sk; +- int err = 0; + long current_timeo = *timeo_p; + DEFINE_WAIT(wait); ++ int err = 0; + + pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, + *timeo_p, msg_len); +@@ -7015,17 +7015,13 @@ static int sctp_wait_for_sndbuf(struct s + release_sock(sk); + current_timeo = schedule_timeout(current_timeo); + lock_sock(sk); +- if (sk != asoc->base.sk) { +- release_sock(sk); +- sk = asoc->base.sk; +- lock_sock(sk); +- } ++ if (sk != asoc->base.sk) ++ goto do_error; + + *timeo_p = current_timeo; + } + + out: +- *orig_sk = sk; + finish_wait(&asoc->wait, &wait); + + /* Release the association's refcnt. */ diff --git a/queue-3.18/series b/queue-3.18/series index 8c9b3ec3b20..09f54f2c771 100644 --- a/queue-3.18/series +++ b/queue-3.18/series @@ -38,3 +38,15 @@ eventpoll.h-add-missing-epoll-event-masks.patch um-stop-abusing-__kernel__.patch um-remove-copy-paste-code-from-init.h.patch x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch +net-tcp-close-sock-if-net-namespace-is-exiting.patch +dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch +net-igmp-fix-source-address-check-for-igmpv3-reports.patch +tcp-__tcp_hdrlen-helper.patch +net-qdisc_pkt_len_init-should-be-more-robust.patch +pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch +sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch +sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch +vmxnet3-repair-memory-leak.patch +net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch +ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch +ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch diff --git a/queue-3.18/tcp-__tcp_hdrlen-helper.patch b/queue-3.18/tcp-__tcp_hdrlen-helper.patch new file mode 100644 index 00000000000..6283cbbcd36 --- /dev/null +++ b/queue-3.18/tcp-__tcp_hdrlen-helper.patch @@ -0,0 +1,39 @@ +From d9b3fca27385eafe61c3ca6feab6cb1e7dc77482 Mon Sep 17 00:00:00 2001 +From: Craig Gallek +Date: Wed, 10 Feb 2016 11:50:37 -0500 +Subject: tcp: __tcp_hdrlen() helper + +From: Craig Gallek + +commit d9b3fca27385eafe61c3ca6feab6cb1e7dc77482 upstream. + +tcp_hdrlen is wasteful if you already have a pointer to struct tcphdr. +This splits the size calculation into a helper function that can be +used if a struct tcphdr is already available. + +Signed-off-by: Craig Gallek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/tcp.h | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/include/linux/tcp.h ++++ b/include/linux/tcp.h +@@ -29,9 +29,14 @@ static inline struct tcphdr *tcp_hdr(con + return (struct tcphdr *)skb_transport_header(skb); + } + ++static inline unsigned int __tcp_hdrlen(const struct tcphdr *th) ++{ ++ return th->doff * 4; ++} ++ + static inline unsigned int tcp_hdrlen(const struct sk_buff *skb) + { +- return tcp_hdr(skb)->doff * 4; ++ return __tcp_hdrlen(tcp_hdr(skb)); + } + + static inline struct tcphdr *inner_tcp_hdr(const struct sk_buff *skb) diff --git a/queue-3.18/vmxnet3-repair-memory-leak.patch b/queue-3.18/vmxnet3-repair-memory-leak.patch new file mode 100644 index 00000000000..8b3edb8e03b --- /dev/null +++ b/queue-3.18/vmxnet3-repair-memory-leak.patch @@ -0,0 +1,58 @@ +From foo@baz Mon Jan 29 11:15:06 CET 2018 +From: Neil Horman +Date: Mon, 22 Jan 2018 16:06:37 -0500 +Subject: vmxnet3: repair memory leak + +From: Neil Horman + + +[ Upstream commit 848b159835ddef99cc4193083f7e786c3992f580 ] + +with the introduction of commit +b0eb57cb97e7837ebb746404c2c58c6f536f23fa, it appears that rq->buf_info +is improperly handled. While it is heap allocated when an rx queue is +setup, and freed when torn down, an old line of code in +vmxnet3_rq_destroy was not properly removed, leading to rq->buf_info[0] +being set to NULL prior to its being freed, causing a memory leak, which +eventually exhausts the system on repeated create/destroy operations +(for example, when the mtu of a vmxnet3 interface is changed +frequently. + +Fix is pretty straight forward, just move the NULL set to after the +free. + +Tested by myself with successful results + +Applies to net, and should likely be queued for stable, please + +Signed-off-by: Neil Horman +Reported-By: boyang@redhat.com +CC: boyang@redhat.com +CC: Shrikrishna Khare +CC: "VMware, Inc." +CC: David S. Miller +Acked-by: Shrikrishna Khare +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/vmxnet3/vmxnet3_drv.c ++++ b/drivers/net/vmxnet3/vmxnet3_drv.c +@@ -1420,7 +1420,6 @@ static void vmxnet3_rq_destroy(struct vm + rq->rx_ring[i].basePA); + rq->rx_ring[i].base = NULL; + } +- rq->buf_info[i] = NULL; + } + + if (rq->comp_ring.base) { +@@ -1435,6 +1434,7 @@ static void vmxnet3_rq_destroy(struct vm + (rq->rx_ring[0].size + rq->rx_ring[1].size); + dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0], + rq->buf_info_pa); ++ rq->buf_info[0] = rq->buf_info[1] = NULL; + } + } +