]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.18-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Jan 2018 10:57:01 +0000 (11:57 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Jan 2018 10:57:01 +0000 (11:57 +0100)
added patches:
dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch
ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch
ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch
net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch
net-igmp-fix-source-address-check-for-igmpv3-reports.patch
net-qdisc_pkt_len_init-should-be-more-robust.patch
net-tcp-close-sock-if-net-namespace-is-exiting.patch
pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch
sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch
sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch
tcp-__tcp_hdrlen-helper.patch
vmxnet3-repair-memory-leak.patch

13 files changed:
queue-3.18/dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch [new file with mode: 0644]
queue-3.18/ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch [new file with mode: 0644]
queue-3.18/ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch [new file with mode: 0644]
queue-3.18/net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch [new file with mode: 0644]
queue-3.18/net-igmp-fix-source-address-check-for-igmpv3-reports.patch [new file with mode: 0644]
queue-3.18/net-qdisc_pkt_len_init-should-be-more-robust.patch [new file with mode: 0644]
queue-3.18/net-tcp-close-sock-if-net-namespace-is-exiting.patch [new file with mode: 0644]
queue-3.18/pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch [new file with mode: 0644]
queue-3.18/sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch [new file with mode: 0644]
queue-3.18/sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch [new file with mode: 0644]
queue-3.18/series
queue-3.18/tcp-__tcp_hdrlen-helper.patch [new file with mode: 0644]
queue-3.18/vmxnet3-repair-memory-leak.patch [new file with mode: 0644]

diff --git a/queue-3.18/dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch b/queue-3.18/dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch
new file mode 100644 (file)
index 0000000..66838d3
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Mon Jan 29 11:15:06 CET 2018
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+Date: Fri, 26 Jan 2018 15:14:16 +0300
+Subject: dccp: don't restart ccid2_hc_tx_rto_expire() if sk in closed state
+
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+
+
+[ Upstream commit dd5684ecae3bd8e44b644f50e2c12c7e57fdfef5 ]
+
+ccid2_hc_tx_rto_expire() timer callback always restarts the timer
+again and can run indefinitely (unless it is stopped outside), and after
+commit 120e9dabaf55 ("dccp: defer ccid_hc_tx_delete() at dismantle time"),
+which moved ccid_hc_tx_delete() (also includes sk_stop_timer()) from
+dccp_destroy_sock() to sk_destruct(), this started to happen quite often.
+The timer prevents releasing the socket, as a result, sk_destruct() won't
+be called.
+
+Found with LTP/dccp_ipsec tests running on the bonding device,
+which later couldn't be unloaded after the tests were completed:
+
+  unregister_netdevice: waiting for bond0 to become free. Usage count = 148
+
+Fixes: 2a91aa396739 ("[DCCP] CCID2: Initial CCID2 (TCP-Like) implementation")
+Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ccids/ccid2.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/dccp/ccids/ccid2.c
++++ b/net/dccp/ccids/ccid2.c
+@@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(unsig
+       ccid2_pr_debug("RTO_EXPIRE\n");
++      if (sk->sk_state == DCCP_CLOSED)
++              goto out;
++
+       /* back-off timer */
+       hc->tx_rto <<= 1;
+       if (hc->tx_rto > DCCP_RTO_MAX)
diff --git a/queue-3.18/ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch b/queue-3.18/ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch
new file mode 100644 (file)
index 0000000..f71bcda
--- /dev/null
@@ -0,0 +1,58 @@
+From foo@baz Mon Jan 29 11:15:06 CET 2018
+From: Jim Westfall <jwestfall@surrealistic.net>
+Date: Sun, 14 Jan 2018 04:18:51 -0800
+Subject: ipv4: Make neigh lookup keys for loopback/point-to-point devices be INADDR_ANY
+
+From: Jim Westfall <jwestfall@surrealistic.net>
+
+
+[ Upstream commit cd9ff4de0107c65d69d02253bb25d6db93c3dbc1 ]
+
+Map all lookup neigh keys to INADDR_ANY for loopback/point-to-point devices
+to avoid making an entry for every remote ip the device needs to talk to.
+
+This used the be the old behavior but became broken in a263b3093641f
+(ipv4: Make neigh lookups directly in output packet path) and later removed
+in 0bb4087cbec0 (ipv4: Fix neigh lookup keying over loopback/point-to-point
+devices) because it was broken.
+
+Signed-off-by: Jim Westfall <jwestfall@surrealistic.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/arp.h |    3 +++
+ net/ipv4/arp.c    |    7 ++++++-
+ 2 files changed, 9 insertions(+), 1 deletion(-)
+
+--- a/include/net/arp.h
++++ b/include/net/arp.h
+@@ -37,6 +37,9 @@ static inline struct neighbour *__ipv4_n
+ {
+       struct neighbour *n;
++      if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
++              key = INADDR_ANY;
++
+       rcu_read_lock_bh();
+       n = __ipv4_neigh_lookup_noref(dev, key);
+       if (n && !atomic_inc_not_zero(&n->refcnt))
+--- a/net/ipv4/arp.c
++++ b/net/ipv4/arp.c
+@@ -221,11 +221,16 @@ static u32 arp_hash(const void *pkey,
+ static int arp_constructor(struct neighbour *neigh)
+ {
+-      __be32 addr = *(__be32 *)neigh->primary_key;
++      __be32 addr;
+       struct net_device *dev = neigh->dev;
+       struct in_device *in_dev;
+       struct neigh_parms *parms;
++      u32 inaddr_any = INADDR_ANY;
++      if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
++              memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len);
++
++      addr = *(__be32 *)neigh->primary_key;
+       rcu_read_lock();
+       in_dev = __in_dev_get_rcu(dev);
+       if (in_dev == NULL) {
diff --git a/queue-3.18/ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch b/queue-3.18/ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch
new file mode 100644 (file)
index 0000000..ebd300a
--- /dev/null
@@ -0,0 +1,94 @@
+From foo@baz Mon Jan 29 11:15:06 CET 2018
+From: Mike Maloney <maloney@google.com>
+Date: Wed, 10 Jan 2018 12:45:10 -0500
+Subject: ipv6: fix udpv6 sendmsg crash caused by too small MTU
+
+From: Mike Maloney <maloney@google.com>
+
+
+[ Upstream commit 749439bfac6e1a2932c582e2699f91d329658196 ]
+
+The logic in __ip6_append_data() assumes that the MTU is at least large
+enough for the headers.  A device's MTU may be adjusted after being
+added while sendmsg() is processing data, resulting in
+__ip6_append_data() seeing any MTU.  For an mtu smaller than the size of
+the fragmentation header, the math results in a negative 'maxfraglen',
+which causes problems when refragmenting any previous skb in the
+skb_write_queue, leaving it possibly malformed.
+
+Instead sendmsg returns EINVAL when the mtu is calculated to be less
+than IPV6_MIN_MTU.
+
+Found by syzkaller:
+kernel BUG at ./include/linux/skbuff.h:2064!
+invalid opcode: 0000 [#1] SMP KASAN
+Dumping ftrace buffer:
+   (ftrace buffer empty)
+Modules linked in:
+CPU: 1 PID: 14216 Comm: syz-executor5 Not tainted 4.13.0-rc4+ #2
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+task: ffff8801d0b68580 task.stack: ffff8801ac6b8000
+RIP: 0010:__skb_pull include/linux/skbuff.h:2064 [inline]
+RIP: 0010:__ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617
+RSP: 0018:ffff8801ac6bf570 EFLAGS: 00010216
+RAX: 0000000000010000 RBX: 0000000000000028 RCX: ffffc90003cce000
+RDX: 00000000000001b8 RSI: ffffffff839df06f RDI: ffff8801d9478ca0
+RBP: ffff8801ac6bf780 R08: ffff8801cc3f1dbc R09: 0000000000000000
+R10: ffff8801ac6bf7a0 R11: 43cb4b7b1948a9e7 R12: ffff8801cc3f1dc8
+R13: ffff8801cc3f1d40 R14: 0000000000001036 R15: dffffc0000000000
+FS:  00007f43d740c700(0000) GS:ffff8801dc100000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f7834984000 CR3: 00000001d79b9000 CR4: 00000000001406e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ ip6_finish_skb include/net/ipv6.h:911 [inline]
+ udp_v6_push_pending_frames+0x255/0x390 net/ipv6/udp.c:1093
+ udpv6_sendmsg+0x280d/0x31a0 net/ipv6/udp.c:1363
+ inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762
+ sock_sendmsg_nosec net/socket.c:633 [inline]
+ sock_sendmsg+0xca/0x110 net/socket.c:643
+ SYSC_sendto+0x352/0x5a0 net/socket.c:1750
+ SyS_sendto+0x40/0x50 net/socket.c:1718
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+RIP: 0033:0x4512e9
+RSP: 002b:00007f43d740bc08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c
+RAX: ffffffffffffffda RBX: 00000000007180a8 RCX: 00000000004512e9
+RDX: 000000000000002e RSI: 0000000020d08000 RDI: 0000000000000005
+RBP: 0000000000000086 R08: 00000000209c1000 R09: 000000000000001c
+R10: 0000000000040800 R11: 0000000000000216 R12: 00000000004b9c69
+R13: 00000000ffffffff R14: 0000000000000005 R15: 00000000202c2000
+Code: 9e 01 fe e9 c5 e8 ff ff e8 7f 9e 01 fe e9 4a ea ff ff 48 89 f7 e8 52 9e 01 fe e9 aa eb ff ff e8 a8 b6 cf fd 0f 0b e8 a1 b6 cf fd <0f> 0b 49 8d 45 78 4d 8d 45 7c 48 89 85 78 fe ff ff 49 8d 85 ba
+RIP: __skb_pull include/linux/skbuff.h:2064 [inline] RSP: ffff8801ac6bf570
+RIP: __ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 RSP: ffff8801ac6bf570
+
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Mike Maloney <maloney@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_output.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1214,14 +1214,16 @@ int ip6_append_data(struct sock *sk, int
+               np->cork.tclass = tclass;
+               if (rt->dst.flags & DST_XFRM_TUNNEL)
+                       mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+-                            rt->dst.dev->mtu : dst_mtu(&rt->dst);
++                            READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
+               else
+                       mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+-                            rt->dst.dev->mtu : dst_mtu(rt->dst.path);
++                            READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
+               if (np->frag_size < mtu) {
+                       if (np->frag_size)
+                               mtu = np->frag_size;
+               }
++              if (mtu < IPV6_MIN_MTU)
++                      return -EINVAL;
+               cork->fragsize = mtu;
+               if (dst_allfrag(rt->dst.path))
+                       cork->flags |= IPCORK_ALLFRAG;
diff --git a/queue-3.18/net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch b/queue-3.18/net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch
new file mode 100644 (file)
index 0000000..6b461e1
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Mon Jan 29 11:15:06 CET 2018
+From: Jim Westfall <jwestfall@surrealistic.net>
+Date: Sun, 14 Jan 2018 04:18:50 -0800
+Subject: net: Allow neigh contructor functions ability to modify the primary_key
+
+From: Jim Westfall <jwestfall@surrealistic.net>
+
+
+[ Upstream commit 096b9854c04df86f03b38a97d40b6506e5730919 ]
+
+Use n->primary_key instead of pkey to account for the possibility that a neigh
+constructor function may have modified the primary_key value.
+
+Signed-off-by: Jim Westfall <jwestfall@surrealistic.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/neighbour.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -508,7 +508,7 @@ struct neighbour *__neigh_create(struct
+       if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
+               nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
+-      hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
++      hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
+       if (n->parms->dead) {
+               rc = ERR_PTR(-EINVAL);
+@@ -520,7 +520,7 @@ struct neighbour *__neigh_create(struct
+            n1 != NULL;
+            n1 = rcu_dereference_protected(n1->next,
+                       lockdep_is_held(&tbl->lock))) {
+-              if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
++              if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
+                       if (want_ref)
+                               neigh_hold(n1);
+                       rc = n1;
diff --git a/queue-3.18/net-igmp-fix-source-address-check-for-igmpv3-reports.patch b/queue-3.18/net-igmp-fix-source-address-check-for-igmpv3-reports.patch
new file mode 100644 (file)
index 0000000..1ee58b0
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Mon Jan 29 11:15:06 CET 2018
+From: Felix Fietkau <nbd@nbd.name>
+Date: Fri, 19 Jan 2018 11:50:46 +0100
+Subject: net: igmp: fix source address check for IGMPv3 reports
+
+From: Felix Fietkau <nbd@nbd.name>
+
+
+[ Upstream commit ad23b750933ea7bf962678972a286c78a8fa36aa ]
+
+Commit "net: igmp: Use correct source address on IGMPv3 reports"
+introduced a check to validate the source address of locally generated
+IGMPv3 packets.
+Instead of checking the local interface address directly, it uses
+inet_ifa_match(fl4->saddr, ifa), which checks if the address is on the
+local subnet (or equal to the point-to-point address if used).
+
+This breaks for point-to-point interfaces, so check against
+ifa->ifa_local directly.
+
+Cc: Kevin Cernekee <cernekee@chromium.org>
+Fixes: a46182b00290 ("net: igmp: Use correct source address on IGMPv3 reports")
+Reported-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/igmp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -329,7 +329,7 @@ static __be32 igmpv3_get_srcaddr(struct
+               return htonl(INADDR_ANY);
+       for_ifa(in_dev) {
+-              if (inet_ifa_match(fl4->saddr, ifa))
++              if (fl4->saddr == ifa->ifa_local)
+                       return fl4->saddr;
+       } endfor_ifa(in_dev);
diff --git a/queue-3.18/net-qdisc_pkt_len_init-should-be-more-robust.patch b/queue-3.18/net-qdisc_pkt_len_init-should-be-more-robust.patch
new file mode 100644 (file)
index 0000000..42369e7
--- /dev/null
@@ -0,0 +1,61 @@
+From foo@baz Mon Jan 29 11:15:06 CET 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 18 Jan 2018 19:59:19 -0800
+Subject: net: qdisc_pkt_len_init() should be more robust
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 7c68d1a6b4db9012790af7ac0f0fdc0d2083422a ]
+
+Without proper validation of DODGY packets, we might very well
+feed qdisc_pkt_len_init() with invalid GSO packets.
+
+tcp_hdrlen() might access out-of-bound data, so let's use
+skb_header_pointer() and proper checks.
+
+Whole story is described in commit d0c081b49137 ("flow_dissector:
+properly cap thoff field")
+
+We have the goal of validating DODGY packets earlier in the stack,
+so we might very well revert this fix in the future.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Cc: Jason Wang <jasowang@redhat.com>
+Reported-by: syzbot+9da69ebac7dddd804552@syzkaller.appspotmail.com
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |   19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2772,10 +2772,21 @@ static void qdisc_pkt_len_init(struct sk
+               hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+               /* + transport layer */
+-              if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+-                      hdr_len += tcp_hdrlen(skb);
+-              else
+-                      hdr_len += sizeof(struct udphdr);
++              if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
++                      const struct tcphdr *th;
++                      struct tcphdr _tcphdr;
++
++                      th = skb_header_pointer(skb, skb_transport_offset(skb),
++                                              sizeof(_tcphdr), &_tcphdr);
++                      if (likely(th))
++                              hdr_len += __tcp_hdrlen(th);
++              } else {
++                      struct udphdr _udphdr;
++
++                      if (skb_header_pointer(skb, skb_transport_offset(skb),
++                                             sizeof(_udphdr), &_udphdr))
++                              hdr_len += sizeof(struct udphdr);
++              }
+               if (shinfo->gso_type & SKB_GSO_DODGY)
+                       gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
diff --git a/queue-3.18/net-tcp-close-sock-if-net-namespace-is-exiting.patch b/queue-3.18/net-tcp-close-sock-if-net-namespace-is-exiting.patch
new file mode 100644 (file)
index 0000000..08aa5b2
--- /dev/null
@@ -0,0 +1,120 @@
+From foo@baz Mon Jan 29 10:14:57 CET 2018
+From: Dan Streetman <ddstreet@ieee.org>
+Date: Thu, 18 Jan 2018 16:14:26 -0500
+Subject: net: tcp: close sock if net namespace is exiting
+
+From: Dan Streetman <ddstreet@ieee.org>
+
+
+[ Upstream commit 4ee806d51176ba7b8ff1efd81f271d7252e03a1d ]
+
+When a tcp socket is closed, if it detects that its net namespace is
+exiting, close immediately and do not wait for FIN sequence.
+
+For normal sockets, a reference is taken to their net namespace, so it will
+never exit while the socket is open.  However, kernel sockets do not take a
+reference to their net namespace, so it may begin exiting while the kernel
+socket is still open.  In this case if the kernel socket is a tcp socket,
+it will stay open trying to complete its close sequence.  The sock's dst(s)
+hold a reference to their interface, which are all transferred to the
+namespace's loopback interface when the real interfaces are taken down.
+When the namespace tries to take down its loopback interface, it hangs
+waiting for all references to the loopback interface to release, which
+results in messages like:
+
+unregister_netdevice: waiting for lo to become free. Usage count = 1
+
+These messages continue until the socket finally times out and closes.
+Since the net namespace cleanup holds the net_mutex while calling its
+registered pernet callbacks, any new net namespace initialization is
+blocked until the current net namespace finishes exiting.
+
+After this change, the tcp socket notices the exiting net namespace, and
+closes immediately, releasing its dst(s) and their reference to the
+loopback interface, which lets the net namespace continue exiting.
+
+Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1711407
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=97811
+Signed-off-by: Dan Streetman <ddstreet@canonical.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/net_namespace.h |   10 ++++++++++
+ net/ipv4/tcp.c              |    3 +++
+ net/ipv4/tcp_timer.c        |   15 +++++++++++++++
+ 3 files changed, 28 insertions(+)
+
+--- a/include/net/net_namespace.h
++++ b/include/net/net_namespace.h
+@@ -200,6 +200,11 @@ int net_eq(const struct net *net1, const
+       return net1 == net2;
+ }
++static inline int check_net(const struct net *net)
++{
++      return atomic_read(&net->count) != 0;
++}
++
+ void net_drop_ns(void *);
+ #else
+@@ -223,6 +228,11 @@ int net_eq(const struct net *net1, const
+ {
+       return 1;
+ }
++
++static inline int check_net(const struct net *net)
++{
++      return 1;
++}
+ #define net_drop_ns NULL
+ #endif
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2182,6 +2182,9 @@ adjudge_to_death:
+                       tcp_send_active_reset(sk, GFP_ATOMIC);
+                       NET_INC_STATS_BH(sock_net(sk),
+                                       LINUX_MIB_TCPABORTONMEMORY);
++              } else if (!check_net(sock_net(sk))) {
++                      /* Not possible to send reset; just close */
++                      tcp_set_state(sk, TCP_CLOSE);
+               }
+       }
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -46,11 +46,19 @@ static void tcp_write_err(struct sock *s
+  * to prevent DoS attacks. It is called when a retransmission timeout
+  * or zero probe timeout occurs on orphaned socket.
+  *
++ * Also close if our net namespace is exiting; in that case there is no
++ * hope of ever communicating again since all netns interfaces are already
++ * down (or about to be down), and we need to release our dst references,
++ * which have been moved to the netns loopback interface, so the namespace
++ * can finish exiting.  This condition is only possible if we are a kernel
++ * socket, as those do not hold references to the namespace.
++ *
+  * Criteria is still not confirmed experimentally and may change.
+  * We kill the socket, if:
+  * 1. If number of orphaned sockets exceeds an administratively configured
+  *    limit.
+  * 2. If we have strong memory pressure.
++ * 3. If our net namespace is exiting.
+  */
+ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
+ {
+@@ -79,6 +87,13 @@ static int tcp_out_of_resources(struct s
+               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
+               return 1;
+       }
++
++      if (!check_net(sock_net(sk))) {
++              /* Not possible to send reset; just close */
++              tcp_done(sk);
++              return 1;
++      }
++
+       return 0;
+ }
diff --git a/queue-3.18/pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch b/queue-3.18/pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch
new file mode 100644 (file)
index 0000000..5f2a710
--- /dev/null
@@ -0,0 +1,126 @@
+From foo@baz Mon Jan 29 11:15:06 CET 2018
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Mon, 22 Jan 2018 18:06:37 +0100
+Subject: pppoe: take ->needed_headroom of lower device into account on xmit
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit 02612bb05e51df8489db5e94d0cf8d1c81f87b0c ]
+
+In pppoe_sendmsg(), reserving dev->hard_header_len bytes of headroom
+was probably fine before the introduction of ->needed_headroom in
+commit f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom").
+
+But now, virtual devices typically advertise the size of their overhead
+in dev->needed_headroom, so we must also take it into account in
+skb_reserve().
+Allocation size of skb is also updated to take dev->needed_tailroom
+into account and replace the arbitrary 32 bytes with the real size of
+a PPPoE header.
+
+This issue was discovered by syzbot, who connected a pppoe socket to a
+gre device which had dev->header_ops->create == ipgre_header and
+dev->hard_header_len == 0. Therefore, PPPoE didn't reserve any
+headroom, and dev_hard_header() crashed when ipgre_header() tried to
+prepend its header to skb->data.
+
+skbuff: skb_under_panic: text:000000001d390b3a len:31 put:24
+head:00000000d8ed776f data:000000008150e823 tail:0x7 end:0xc0 dev:gre0
+------------[ cut here ]------------
+kernel BUG at net/core/skbuff.c:104!
+invalid opcode: 0000 [#1] SMP KASAN
+Dumping ftrace buffer:
+    (ftrace buffer empty)
+Modules linked in:
+CPU: 1 PID: 3670 Comm: syzkaller801466 Not tainted
+4.15.0-rc7-next-20180115+ #97
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
+Google 01/01/2011
+RIP: 0010:skb_panic+0x162/0x1f0 net/core/skbuff.c:100
+RSP: 0018:ffff8801d9bd7840 EFLAGS: 00010282
+RAX: 0000000000000083 RBX: ffff8801d4f083c0 RCX: 0000000000000000
+RDX: 0000000000000083 RSI: 1ffff1003b37ae92 RDI: ffffed003b37aefc
+RBP: ffff8801d9bd78a8 R08: 1ffff1003b37ae8a R09: 0000000000000000
+R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff86200de0
+R13: ffffffff84a981ad R14: 0000000000000018 R15: ffff8801d2d34180
+FS:  00000000019c4880(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00000000208bc000 CR3: 00000001d9111001 CR4: 00000000001606e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+  skb_under_panic net/core/skbuff.c:114 [inline]
+  skb_push+0xce/0xf0 net/core/skbuff.c:1714
+  ipgre_header+0x6d/0x4e0 net/ipv4/ip_gre.c:879
+  dev_hard_header include/linux/netdevice.h:2723 [inline]
+  pppoe_sendmsg+0x58e/0x8b0 drivers/net/ppp/pppoe.c:890
+  sock_sendmsg_nosec net/socket.c:630 [inline]
+  sock_sendmsg+0xca/0x110 net/socket.c:640
+  sock_write_iter+0x31a/0x5d0 net/socket.c:909
+  call_write_iter include/linux/fs.h:1775 [inline]
+  do_iter_readv_writev+0x525/0x7f0 fs/read_write.c:653
+  do_iter_write+0x154/0x540 fs/read_write.c:932
+  vfs_writev+0x18a/0x340 fs/read_write.c:977
+  do_writev+0xfc/0x2a0 fs/read_write.c:1012
+  SYSC_writev fs/read_write.c:1085 [inline]
+  SyS_writev+0x27/0x30 fs/read_write.c:1082
+  entry_SYSCALL_64_fastpath+0x29/0xa0
+
+Admittedly PPPoE shouldn't be allowed to run on non Ethernet-like
+interfaces, but reserving space for ->needed_headroom is a more
+fundamental issue that needs to be addressed first.
+
+Same problem exists for __pppoe_xmit(), which also needs to take
+dev->needed_headroom into account in skb_cow_head().
+
+Fixes: f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom")
+Reported-by: syzbot+ed0838d0fa4c4f2b528e20286e6dc63effc7c14d@syzkaller.appspotmail.com
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Reviewed-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/pppoe.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ppp/pppoe.c
++++ b/drivers/net/ppp/pppoe.c
+@@ -830,6 +830,7 @@ static int pppoe_sendmsg(struct kiocb *i
+       struct pppoe_hdr *ph;
+       struct net_device *dev;
+       char *start;
++      int hlen;
+       lock_sock(sk);
+       if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) {
+@@ -848,16 +849,16 @@ static int pppoe_sendmsg(struct kiocb *i
+       if (total_len > (dev->mtu + dev->hard_header_len))
+               goto end;
+-
+-      skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32,
+-                         0, GFP_KERNEL);
++      hlen = LL_RESERVED_SPACE(dev);
++      skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len +
++                         dev->needed_tailroom, 0, GFP_KERNEL);
+       if (!skb) {
+               error = -ENOMEM;
+               goto end;
+       }
+       /* Reserve space for headers. */
+-      skb_reserve(skb, dev->hard_header_len);
++      skb_reserve(skb, hlen);
+       skb_reset_network_header(skb);
+       skb->dev = dev;
+@@ -918,7 +919,7 @@ static int __pppoe_xmit(struct sock *sk,
+       /* Copy the data if there is no space for the header or if it's
+        * read-only.
+        */
+-      if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len))
++      if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph)))
+               goto abort;
+       __skb_push(skb, sizeof(*ph));
diff --git a/queue-3.18/sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch b/queue-3.18/sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch
new file mode 100644 (file)
index 0000000..0112826
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Mon Jan 29 11:15:06 CET 2018
+From: Xin Long <lucien.xin@gmail.com>
+Date: Mon, 15 Jan 2018 17:02:00 +0800
+Subject: sctp: do not allow the v4 socket to bind a v4mapped v6 address
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit c5006b8aa74599ce19104b31d322d2ea9ff887cc ]
+
+The check in sctp_sockaddr_af is not robust enough to forbid binding a
+v4mapped v6 addr on a v4 socket.
+
+The worse thing is that v4 socket's bind_verify would not convert this
+v4mapped v6 addr to a v4 addr. syzbot even reported a crash as the v4
+socket bound a v6 addr.
+
+This patch is to fix it by doing the common sa.sa_family check first,
+then AF_INET check for v4mapped v6 addrs.
+
+Fixes: 7dab83de50c7 ("sctp: Support ipv6only AF_INET6 sockets.")
+Reported-by: syzbot+7b7b518b1228d2743963@syzkaller.appspotmail.com
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c |   14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -333,16 +333,14 @@ static struct sctp_af *sctp_sockaddr_af(
+       if (len < sizeof (struct sockaddr))
+               return NULL;
++      if (!opt->pf->af_supported(addr->sa.sa_family, opt))
++              return NULL;
++
+       /* V4 mapped address are really of AF_INET family */
+       if (addr->sa.sa_family == AF_INET6 &&
+-          ipv6_addr_v4mapped(&addr->v6.sin6_addr)) {
+-              if (!opt->pf->af_supported(AF_INET, opt))
+-                      return NULL;
+-      } else {
+-              /* Does this PF support this AF? */
+-              if (!opt->pf->af_supported(addr->sa.sa_family, opt))
+-                      return NULL;
+-      }
++          ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
++          !opt->pf->af_supported(AF_INET, opt))
++              return NULL;
+       /* If we get this far, af is valid. */
+       af = sctp_get_af_specific(addr->sa.sa_family);
diff --git a/queue-3.18/sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch b/queue-3.18/sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch
new file mode 100644 (file)
index 0000000..9ca3c3a
--- /dev/null
@@ -0,0 +1,87 @@
+From foo@baz Mon Jan 29 11:15:06 CET 2018
+From: Xin Long <lucien.xin@gmail.com>
+Date: Mon, 15 Jan 2018 17:01:36 +0800
+Subject: sctp: return error if the asoc has been peeled off in sctp_wait_for_sndbuf
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit a0ff660058b88d12625a783ce9e5c1371c87951f ]
+
+After commit cea0cc80a677 ("sctp: use the right sk after waking up from
+wait_buf sleep"), it may change to lock another sk if the asoc has been
+peeled off in sctp_wait_for_sndbuf.
+
+However, the asoc's new sk could be already closed elsewhere, as it's in
+the sendmsg context of the old sk that can't avoid the new sk's closing.
+If the sk's last one refcnt is held by this asoc, later on after putting
+this asoc, the new sk will be freed, while under it's own lock.
+
+This patch is to revert that commit, but fix the old issue by returning
+error under the old sk's lock.
+
+Fixes: cea0cc80a677 ("sctp: use the right sk after waking up from wait_buf sleep")
+Reported-by: syzbot+ac6ea7baa4432811eb50@syzkaller.appspotmail.com
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c |   16 ++++++----------
+ 1 file changed, 6 insertions(+), 10 deletions(-)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -83,7 +83,7 @@
+ static int sctp_writeable(struct sock *sk);
+ static void sctp_wfree(struct sk_buff *skb);
+ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
+-                              size_t msg_len, struct sock **orig_sk);
++                              size_t msg_len);
+ static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
+ static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
+ static int sctp_wait_for_accept(struct sock *sk, long timeo);
+@@ -1948,7 +1948,7 @@ static int sctp_sendmsg(struct kiocb *io
+       timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+       if (!sctp_wspace(asoc)) {
+               /* sk can be changed by peel off when waiting for buf. */
+-              err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
++              err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
+               if (err) {
+                       if (err == -ESRCH) {
+                               /* asoc is already dead. */
+@@ -6981,12 +6981,12 @@ void sctp_sock_rfree(struct sk_buff *skb
+ /* Helper function to wait for space in the sndbuf.  */
+ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
+-                              size_t msg_len, struct sock **orig_sk)
++                              size_t msg_len)
+ {
+       struct sock *sk = asoc->base.sk;
+-      int err = 0;
+       long current_timeo = *timeo_p;
+       DEFINE_WAIT(wait);
++      int err = 0;
+       pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc,
+                *timeo_p, msg_len);
+@@ -7015,17 +7015,13 @@ static int sctp_wait_for_sndbuf(struct s
+               release_sock(sk);
+               current_timeo = schedule_timeout(current_timeo);
+               lock_sock(sk);
+-              if (sk != asoc->base.sk) {
+-                      release_sock(sk);
+-                      sk = asoc->base.sk;
+-                      lock_sock(sk);
+-              }
++              if (sk != asoc->base.sk)
++                      goto do_error;
+               *timeo_p = current_timeo;
+       }
+ out:
+-      *orig_sk = sk;
+       finish_wait(&asoc->wait, &wait);
+       /* Release the association's refcnt.  */
index 8c9b3ec3b20ea7b650f7a52a3890f465bbf585eb..09f54f2c7713c5cb2d7737375464c07ce5f03497 100644 (file)
@@ -38,3 +38,15 @@ eventpoll.h-add-missing-epoll-event-masks.patch
 um-stop-abusing-__kernel__.patch
 um-remove-copy-paste-code-from-init.h.patch
 x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch
+net-tcp-close-sock-if-net-namespace-is-exiting.patch
+dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch
+net-igmp-fix-source-address-check-for-igmpv3-reports.patch
+tcp-__tcp_hdrlen-helper.patch
+net-qdisc_pkt_len_init-should-be-more-robust.patch
+pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch
+sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch
+sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch
+vmxnet3-repair-memory-leak.patch
+net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch
+ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch
+ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch
diff --git a/queue-3.18/tcp-__tcp_hdrlen-helper.patch b/queue-3.18/tcp-__tcp_hdrlen-helper.patch
new file mode 100644 (file)
index 0000000..6283cbb
--- /dev/null
@@ -0,0 +1,39 @@
+From d9b3fca27385eafe61c3ca6feab6cb1e7dc77482 Mon Sep 17 00:00:00 2001
+From: Craig Gallek <kraig@google.com>
+Date: Wed, 10 Feb 2016 11:50:37 -0500
+Subject: tcp: __tcp_hdrlen() helper
+
+From: Craig Gallek <kraig@google.com>
+
+commit d9b3fca27385eafe61c3ca6feab6cb1e7dc77482 upstream.
+
+tcp_hdrlen is wasteful if you already have a pointer to struct tcphdr.
+This splits the size calculation into a helper function that can be
+used if a struct tcphdr is already available.
+
+Signed-off-by: Craig Gallek <kraig@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/tcp.h |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/include/linux/tcp.h
++++ b/include/linux/tcp.h
+@@ -29,9 +29,14 @@ static inline struct tcphdr *tcp_hdr(con
+       return (struct tcphdr *)skb_transport_header(skb);
+ }
++static inline unsigned int __tcp_hdrlen(const struct tcphdr *th)
++{
++      return th->doff * 4;
++}
++
+ static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
+ {
+-      return tcp_hdr(skb)->doff * 4;
++      return __tcp_hdrlen(tcp_hdr(skb));
+ }
+ static inline struct tcphdr *inner_tcp_hdr(const struct sk_buff *skb)
diff --git a/queue-3.18/vmxnet3-repair-memory-leak.patch b/queue-3.18/vmxnet3-repair-memory-leak.patch
new file mode 100644 (file)
index 0000000..8b3edb8
--- /dev/null
@@ -0,0 +1,58 @@
+From foo@baz Mon Jan 29 11:15:06 CET 2018
+From: Neil Horman <nhorman@tuxdriver.com>
+Date: Mon, 22 Jan 2018 16:06:37 -0500
+Subject: vmxnet3: repair memory leak
+
+From: Neil Horman <nhorman@tuxdriver.com>
+
+
+[ Upstream commit 848b159835ddef99cc4193083f7e786c3992f580 ]
+
+with the introduction of commit
+b0eb57cb97e7837ebb746404c2c58c6f536f23fa, it appears that rq->buf_info
+is improperly handled.  While it is heap allocated when an rx queue is
+setup, and freed when torn down, an old line of code in
+vmxnet3_rq_destroy was not properly removed, leading to rq->buf_info[0]
+being set to NULL prior to its being freed, causing a memory leak, which
+eventually exhausts the system on repeated create/destroy operations
+(for example, when  the mtu of a vmxnet3 interface is changed
+frequently.
+
+Fix is pretty straight forward, just move the NULL set to after the
+free.
+
+Tested by myself with successful results
+
+Applies to net, and should likely be queued for stable, please
+
+Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
+Reported-By: boyang@redhat.com
+CC: boyang@redhat.com
+CC: Shrikrishna Khare <skhare@vmware.com>
+CC: "VMware, Inc." <pv-drivers@vmware.com>
+CC: David S. Miller <davem@davemloft.net>
+Acked-by: Shrikrishna Khare <skhare@vmware.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vmxnet3/vmxnet3_drv.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/vmxnet3/vmxnet3_drv.c
++++ b/drivers/net/vmxnet3/vmxnet3_drv.c
+@@ -1420,7 +1420,6 @@ static void vmxnet3_rq_destroy(struct vm
+                                         rq->rx_ring[i].basePA);
+                       rq->rx_ring[i].base = NULL;
+               }
+-              rq->buf_info[i] = NULL;
+       }
+       if (rq->comp_ring.base) {
+@@ -1435,6 +1434,7 @@ static void vmxnet3_rq_destroy(struct vm
+                       (rq->rx_ring[0].size + rq->rx_ring[1].size);
+               dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
+                                 rq->buf_info_pa);
++              rq->buf_info[0] = rq->buf_info[1] = NULL;
+       }
+ }