]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 29 Apr 2017 06:23:57 +0000 (08:23 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 29 Apr 2017 06:23:57 +0000 (08:23 +0200)
added patches:
bpf-improve-verifier-packet-range-checks.patch
dp83640-don-t-recieve-time-stamps-twice.patch
gso-validate-assumption-of-frag_list-segementation.patch
ip6mr-fix-notification-device-destruction.patch
ipv6-check-raw-payload-size-correctly-in-ioctl.patch
ipv6-check-skb-protocol-before-lookup-for-nexthop.patch
ipv6-fix-idev-addr_list-corruption.patch
kcm-return-immediately-after-copy_from_user-failure.patch
l2tp-fix-ppp-pseudo-wire-auto-loading.patch
l2tp-hold-tunnel-socket-when-handling-control-frames-in-l2tp_ip-and-l2tp_ip6.patch
l2tp-purge-socket-queues-in-the-.destruct-callback.patch
l2tp-take-reference-on-sessions-being-dumped.patch
macvlan-fix-device-ref-leak-when-purging-bc_queue.patch
net-ipv4-fix-multipath-rtm_getroute-behavior-when-iif-is-given.patch
net-ipv6-regenerate-host-route-if-moved-to-gc-list.patch
net-ipv6-rtf_pcpu-should-not-be-settable-from-userspace.patch
net-mlx5-avoid-dereferencing-uninitialized-pointer.patch
net-mlx5-fix-driver-load-bad-flow-when-having-fw-initializing-timeout.patch
net-mlx5e-fix-ethtool_grxclsrlall-handling.patch
net-mlx5e-fix-small-packet-threshold.patch
net-neigh-guard-against-null-solicit-method.patch
net-packet-fix-overflow-in-check-for-tp_frame_nr.patch
net-packet-fix-overflow-in-check-for-tp_reserve.patch
net-phy-fix-auto-negotiation-stall-due-to-unavailable-interrupt.patch
net-phy-handle-state-correctly-in-phy_stop_machine.patch
net-timestamp-avoid-use-after-free-in-ip_recv_error.patch
net-vrf-fix-setting-nlm_f_excl-flag-when-adding-l3mdev-rule.patch
netpoll-check-for-skb-queue_mapping.patch
sctp-listen-on-the-sock-only-when-it-s-state-is-listening-or-closed.patch
sh_eth-unmap-dma-buffers-when-freeing-rings.patch
sparc64-fix-kernel-panic-due-to-erroneous-ifdef-surrounding-pmd_write.patch
sparc64-kern_addr_valid-regression.patch
tcp-clear-saved_syn-in-tcp_disconnect.patch
tcp-memset-ca_priv-data-to-0-properly.patch

35 files changed:
queue-4.9/bpf-improve-verifier-packet-range-checks.patch [new file with mode: 0644]
queue-4.9/dp83640-don-t-recieve-time-stamps-twice.patch [new file with mode: 0644]
queue-4.9/gso-validate-assumption-of-frag_list-segementation.patch [new file with mode: 0644]
queue-4.9/ip6mr-fix-notification-device-destruction.patch [new file with mode: 0644]
queue-4.9/ipv6-check-raw-payload-size-correctly-in-ioctl.patch [new file with mode: 0644]
queue-4.9/ipv6-check-skb-protocol-before-lookup-for-nexthop.patch [new file with mode: 0644]
queue-4.9/ipv6-fix-idev-addr_list-corruption.patch [new file with mode: 0644]
queue-4.9/kcm-return-immediately-after-copy_from_user-failure.patch [new file with mode: 0644]
queue-4.9/l2tp-fix-ppp-pseudo-wire-auto-loading.patch [new file with mode: 0644]
queue-4.9/l2tp-hold-tunnel-socket-when-handling-control-frames-in-l2tp_ip-and-l2tp_ip6.patch [new file with mode: 0644]
queue-4.9/l2tp-purge-socket-queues-in-the-.destruct-callback.patch [new file with mode: 0644]
queue-4.9/l2tp-take-reference-on-sessions-being-dumped.patch [new file with mode: 0644]
queue-4.9/macvlan-fix-device-ref-leak-when-purging-bc_queue.patch [new file with mode: 0644]
queue-4.9/net-ipv4-fix-multipath-rtm_getroute-behavior-when-iif-is-given.patch [new file with mode: 0644]
queue-4.9/net-ipv6-regenerate-host-route-if-moved-to-gc-list.patch [new file with mode: 0644]
queue-4.9/net-ipv6-rtf_pcpu-should-not-be-settable-from-userspace.patch [new file with mode: 0644]
queue-4.9/net-mlx5-avoid-dereferencing-uninitialized-pointer.patch [new file with mode: 0644]
queue-4.9/net-mlx5-fix-driver-load-bad-flow-when-having-fw-initializing-timeout.patch [new file with mode: 0644]
queue-4.9/net-mlx5e-fix-ethtool_grxclsrlall-handling.patch [new file with mode: 0644]
queue-4.9/net-mlx5e-fix-small-packet-threshold.patch [new file with mode: 0644]
queue-4.9/net-neigh-guard-against-null-solicit-method.patch [new file with mode: 0644]
queue-4.9/net-packet-fix-overflow-in-check-for-tp_frame_nr.patch [new file with mode: 0644]
queue-4.9/net-packet-fix-overflow-in-check-for-tp_reserve.patch [new file with mode: 0644]
queue-4.9/net-phy-fix-auto-negotiation-stall-due-to-unavailable-interrupt.patch [new file with mode: 0644]
queue-4.9/net-phy-handle-state-correctly-in-phy_stop_machine.patch [new file with mode: 0644]
queue-4.9/net-timestamp-avoid-use-after-free-in-ip_recv_error.patch [new file with mode: 0644]
queue-4.9/net-vrf-fix-setting-nlm_f_excl-flag-when-adding-l3mdev-rule.patch [new file with mode: 0644]
queue-4.9/netpoll-check-for-skb-queue_mapping.patch [new file with mode: 0644]
queue-4.9/sctp-listen-on-the-sock-only-when-it-s-state-is-listening-or-closed.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/sh_eth-unmap-dma-buffers-when-freeing-rings.patch [new file with mode: 0644]
queue-4.9/sparc64-fix-kernel-panic-due-to-erroneous-ifdef-surrounding-pmd_write.patch [new file with mode: 0644]
queue-4.9/sparc64-kern_addr_valid-regression.patch [new file with mode: 0644]
queue-4.9/tcp-clear-saved_syn-in-tcp_disconnect.patch [new file with mode: 0644]
queue-4.9/tcp-memset-ca_priv-data-to-0-properly.patch [new file with mode: 0644]

diff --git a/queue-4.9/bpf-improve-verifier-packet-range-checks.patch b/queue-4.9/bpf-improve-verifier-packet-range-checks.patch
new file mode 100644 (file)
index 0000000..df20a08
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Alexei Starovoitov <ast@fb.com>
+Date: Fri, 24 Mar 2017 15:57:33 -0700
+Subject: bpf: improve verifier packet range checks
+
+From: Alexei Starovoitov <ast@fb.com>
+
+
+[ Upstream commit b1977682a3858b5584ffea7cfb7bd863f68db18d ]
+
+llvm can optimize the 'if (ptr > data_end)' checks to be in the order
+slightly different than the original C code which will confuse verifier.
+Like:
+if (ptr + 16 > data_end)
+  return TC_ACT_SHOT;
+// may be followed by
+if (ptr + 14 > data_end)
+  return TC_ACT_SHOT;
+while llvm can see that 'ptr' is valid for all 16 bytes,
+the verifier could not.
+Fix verifier logic to account for such case and add a test.
+
+Reported-by: Huapeng Zhou <hzhou@fb.com>
+Fixes: 969bf05eb3ce ("bpf: direct packet access")
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1829,14 +1829,15 @@ static void find_good_pkt_pointers(struc
+       for (i = 0; i < MAX_BPF_REG; i++)
+               if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
+-                      regs[i].range = dst_reg->off;
++                      /* keep the maximum range already checked */
++                      regs[i].range = max(regs[i].range, dst_reg->off);
+       for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
+               if (state->stack_slot_type[i] != STACK_SPILL)
+                       continue;
+               reg = &state->spilled_regs[i / BPF_REG_SIZE];
+               if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
+-                      reg->range = dst_reg->off;
++                      reg->range = max(reg->range, dst_reg->off);
+       }
+ }
diff --git a/queue-4.9/dp83640-don-t-recieve-time-stamps-twice.patch b/queue-4.9/dp83640-don-t-recieve-time-stamps-twice.patch
new file mode 100644 (file)
index 0000000..1472515
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Tue, 18 Apr 2017 22:14:26 +0300
+Subject: dp83640: don't recieve time stamps twice
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+
+[ Upstream commit 9d386cd9a755c8293e8916264d4d053878a7c9c7 ]
+
+This patch is prompted by a static checker warning about a potential
+use after free.  The concern is that netif_rx_ni() can free "skb" and we
+call it twice.
+
+When I look at the commit that added this, it looks like some stray
+lines were added accidentally.  It doesn't make sense to me that we
+would recieve the same data two times.  I asked the author but never
+recieved a response.
+
+I can't test this code, but I'm pretty sure my patch is correct.
+
+Fixes: 4b063258ab93 ("dp83640: Delay scheduled work.")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Acked-by: Stefan Sørensen <stefan.sorensen@spectralink.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/dp83640.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/drivers/net/phy/dp83640.c
++++ b/drivers/net/phy/dp83640.c
+@@ -1438,8 +1438,6 @@ static bool dp83640_rxtstamp(struct phy_
+               skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT;
+               skb_queue_tail(&dp83640->rx_queue, skb);
+               schedule_delayed_work(&dp83640->ts_work, SKB_TIMESTAMP_TIMEOUT);
+-      } else {
+-              netif_rx_ni(skb);
+       }
+       return true;
diff --git a/queue-4.9/gso-validate-assumption-of-frag_list-segementation.patch b/queue-4.9/gso-validate-assumption-of-frag_list-segementation.patch
new file mode 100644 (file)
index 0000000..1cb42d0
--- /dev/null
@@ -0,0 +1,75 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Ilan Tayari <ilant@mellanox.com>
+Date: Wed, 19 Apr 2017 21:26:07 +0300
+Subject: gso: Validate assumption of frag_list segementation
+
+From: Ilan Tayari <ilant@mellanox.com>
+
+
+[ Upstream commit 43170c4e0ba709c79130c3fe5a41e66279950cd0 ]
+
+Commit 07b26c9454a2 ("gso: Support partial splitting at the frag_list
+pointer") assumes that all SKBs in a frag_list (except maybe the last
+one) contain the same amount of GSO payload.
+
+This assumption is not always correct, resulting in the following
+warning message in the log:
+    skb_segment: too many frags
+
+For example, mlx5 driver in Striding RQ mode creates some RX SKBs with
+one frag, and some with 2 frags.
+After GRO, the frag_list SKBs end up having different amounts of payload.
+If this frag_list SKB is then forwarded, the aforementioned assumption
+is violated.
+
+Validate the assumption, and fall back to software GSO if it not true.
+
+Change-Id: Ia03983f4a47b6534dd987d7a2aad96d54d46d212
+Fixes: 07b26c9454a2 ("gso: Support partial splitting at the frag_list pointer")
+Signed-off-by: Ilan Tayari <ilant@mellanox.com>
+Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skbuff.c |   18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -3076,22 +3076,32 @@ struct sk_buff *skb_segment(struct sk_bu
+       if (sg && csum && (mss != GSO_BY_FRAGS))  {
+               if (!(features & NETIF_F_GSO_PARTIAL)) {
+                       struct sk_buff *iter;
++                      unsigned int frag_len;
+                       if (!list_skb ||
+                           !net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
+                               goto normal;
+-                      /* Split the buffer at the frag_list pointer.
+-                       * This is based on the assumption that all
+-                       * buffers in the chain excluding the last
+-                       * containing the same amount of data.
++                      /* If we get here then all the required
++                       * GSO features except frag_list are supported.
++                       * Try to split the SKB to multiple GSO SKBs
++                       * with no frag_list.
++                       * Currently we can do that only when the buffers don't
++                       * have a linear part and all the buffers except
++                       * the last are of the same length.
+                        */
++                      frag_len = list_skb->len;
+                       skb_walk_frags(head_skb, iter) {
++                              if (frag_len != iter->len && iter->next)
++                                      goto normal;
+                               if (skb_headlen(iter))
+                                       goto normal;
+                               len -= iter->len;
+                       }
++
++                      if (len != frag_len)
++                              goto normal;
+               }
+               /* GSO partial only requires that we trim off any excess that
diff --git a/queue-4.9/ip6mr-fix-notification-device-destruction.patch b/queue-4.9/ip6mr-fix-notification-device-destruction.patch
new file mode 100644 (file)
index 0000000..e6b5af3
--- /dev/null
@@ -0,0 +1,131 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Fri, 21 Apr 2017 20:42:16 +0300
+Subject: ip6mr: fix notification device destruction
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+
+[ Upstream commit 723b929ca0f79c0796f160c2eeda4597ee98d2b8 ]
+
+Andrey Konovalov reported a BUG caused by the ip6mr code which is caused
+because we call unregister_netdevice_many for a device that is already
+being destroyed. In IPv4's ipmr that has been resolved by two commits
+long time ago by introducing the "notify" parameter to the delete
+function and avoiding the unregister when called from a notifier, so
+let's do the same for ip6mr.
+
+The trace from Andrey:
+------------[ cut here ]------------
+kernel BUG at net/core/dev.c:6813!
+invalid opcode: 0000 [#1] SMP KASAN
+Modules linked in:
+CPU: 1 PID: 1165 Comm: kworker/u4:3 Not tainted 4.11.0-rc7+ #251
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
+01/01/2011
+Workqueue: netns cleanup_net
+task: ffff880069208000 task.stack: ffff8800692d8000
+RIP: 0010:rollback_registered_many+0x348/0xeb0 net/core/dev.c:6813
+RSP: 0018:ffff8800692de7f0 EFLAGS: 00010297
+RAX: ffff880069208000 RBX: 0000000000000002 RCX: 0000000000000001
+RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88006af90569
+RBP: ffff8800692de9f0 R08: ffff8800692dec60 R09: 0000000000000000
+R10: 0000000000000006 R11: 0000000000000000 R12: ffff88006af90070
+R13: ffff8800692debf0 R14: dffffc0000000000 R15: ffff88006af90000
+FS:  0000000000000000(0000) GS:ffff88006cb00000(0000)
+knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007fe7e897d870 CR3: 00000000657e7000 CR4: 00000000000006e0
+Call Trace:
+ unregister_netdevice_many.part.105+0x87/0x440 net/core/dev.c:7881
+ unregister_netdevice_many+0xc8/0x120 net/core/dev.c:7880
+ ip6mr_device_event+0x362/0x3f0 net/ipv6/ip6mr.c:1346
+ notifier_call_chain+0x145/0x2f0 kernel/notifier.c:93
+ __raw_notifier_call_chain kernel/notifier.c:394
+ raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
+ call_netdevice_notifiers_info+0x51/0x90 net/core/dev.c:1647
+ call_netdevice_notifiers net/core/dev.c:1663
+ rollback_registered_many+0x919/0xeb0 net/core/dev.c:6841
+ unregister_netdevice_many.part.105+0x87/0x440 net/core/dev.c:7881
+ unregister_netdevice_many net/core/dev.c:7880
+ default_device_exit_batch+0x4fa/0x640 net/core/dev.c:8333
+ ops_exit_list.isra.4+0x100/0x150 net/core/net_namespace.c:144
+ cleanup_net+0x5a8/0xb40 net/core/net_namespace.c:463
+ process_one_work+0xc04/0x1c10 kernel/workqueue.c:2097
+ worker_thread+0x223/0x19c0 kernel/workqueue.c:2231
+ kthread+0x35e/0x430 kernel/kthread.c:231
+ ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430
+Code: 3c 32 00 0f 85 70 0b 00 00 48 b8 00 02 00 00 00 00 ad de 49 89
+47 78 e9 93 fe ff ff 49 8d 57 70 49 8d 5f 78 eb 9e e8 88 7a 14 fe <0f>
+0b 48 8b 9d 28 fe ff ff e8 7a 7a 14 fe 48 b8 00 00 00 00 00
+RIP: rollback_registered_many+0x348/0xeb0 RSP: ffff8800692de7f0
+---[ end trace e0b29c57e9b3292c ]---
+
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Tested-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6mr.c |   13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+--- a/net/ipv6/ip6mr.c
++++ b/net/ipv6/ip6mr.c
+@@ -774,7 +774,8 @@ failure:
+  *    Delete a VIF entry
+  */
+-static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
++static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
++                     struct list_head *head)
+ {
+       struct mif_device *v;
+       struct net_device *dev;
+@@ -820,7 +821,7 @@ static int mif6_delete(struct mr6_table
+                                            dev->ifindex, &in6_dev->cnf);
+       }
+-      if (v->flags & MIFF_REGISTER)
++      if ((v->flags & MIFF_REGISTER) && !notify)
+               unregister_netdevice_queue(dev, head);
+       dev_put(dev);
+@@ -1331,7 +1332,6 @@ static int ip6mr_device_event(struct not
+       struct mr6_table *mrt;
+       struct mif_device *v;
+       int ct;
+-      LIST_HEAD(list);
+       if (event != NETDEV_UNREGISTER)
+               return NOTIFY_DONE;
+@@ -1340,10 +1340,9 @@ static int ip6mr_device_event(struct not
+               v = &mrt->vif6_table[0];
+               for (ct = 0; ct < mrt->maxvif; ct++, v++) {
+                       if (v->dev == dev)
+-                              mif6_delete(mrt, ct, &list);
++                              mif6_delete(mrt, ct, 1, NULL);
+               }
+       }
+-      unregister_netdevice_many(&list);
+       return NOTIFY_DONE;
+ }
+@@ -1552,7 +1551,7 @@ static void mroute_clean_tables(struct m
+       for (i = 0; i < mrt->maxvif; i++) {
+               if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+                       continue;
+-              mif6_delete(mrt, i, &list);
++              mif6_delete(mrt, i, 0, &list);
+       }
+       unregister_netdevice_many(&list);
+@@ -1706,7 +1705,7 @@ int ip6_mroute_setsockopt(struct sock *s
+               if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
+                       return -EFAULT;
+               rtnl_lock();
+-              ret = mif6_delete(mrt, mifi, NULL);
++              ret = mif6_delete(mrt, mifi, 0, NULL);
+               rtnl_unlock();
+               return ret;
diff --git a/queue-4.9/ipv6-check-raw-payload-size-correctly-in-ioctl.patch b/queue-4.9/ipv6-check-raw-payload-size-correctly-in-ioctl.patch
new file mode 100644 (file)
index 0000000..91babbc
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Jamie Bainbridge <jbainbri@redhat.com>
+Date: Wed, 26 Apr 2017 10:43:27 +1000
+Subject: ipv6: check raw payload size correctly in ioctl
+
+From: Jamie Bainbridge <jbainbri@redhat.com>
+
+
+[ Upstream commit 105f5528b9bbaa08b526d3405a5bcd2ff0c953c8 ]
+
+In situations where an skb is paged, the transport header pointer and
+tail pointer can be the same because the skb contents are in frags.
+
+This results in ioctl(SIOCINQ/FIONREAD) incorrectly returning a
+length of 0 when the length to receive is actually greater than zero.
+
+skb->len is already correctly set in ip6_input_finish() with
+pskb_pull(), so use skb->len as it always returns the correct result
+for both linear and paged data.
+
+Signed-off-by: Jamie Bainbridge <jbainbri@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/raw.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -1171,8 +1171,7 @@ static int rawv6_ioctl(struct sock *sk,
+               spin_lock_bh(&sk->sk_receive_queue.lock);
+               skb = skb_peek(&sk->sk_receive_queue);
+               if (skb)
+-                      amount = skb_tail_pointer(skb) -
+-                              skb_transport_header(skb);
++                      amount = skb->len;
+               spin_unlock_bh(&sk->sk_receive_queue.lock);
+               return put_user(amount, (int __user *)arg);
+       }
diff --git a/queue-4.9/ipv6-check-skb-protocol-before-lookup-for-nexthop.patch b/queue-4.9/ipv6-check-skb-protocol-before-lookup-for-nexthop.patch
new file mode 100644 (file)
index 0000000..5ae4230
--- /dev/null
@@ -0,0 +1,108 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Tue, 25 Apr 2017 14:37:15 -0700
+Subject: ipv6: check skb->protocol before lookup for nexthop
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit 199ab00f3cdb6f154ea93fa76fd80192861a821d ]
+
+Andrey reported a out-of-bound access in ip6_tnl_xmit(), this
+is because we use an ipv4 dst in ip6_tnl_xmit() and cast an IPv4
+neigh key as an IPv6 address:
+
+        neigh = dst_neigh_lookup(skb_dst(skb),
+                                 &ipv6_hdr(skb)->daddr);
+        if (!neigh)
+                goto tx_err_link_failure;
+
+        addr6 = (struct in6_addr *)&neigh->primary_key; // <=== HERE
+        addr_type = ipv6_addr_type(addr6);
+
+        if (addr_type == IPV6_ADDR_ANY)
+                addr6 = &ipv6_hdr(skb)->daddr;
+
+        memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
+
+Also the network header of the skb at this point should be still IPv4
+for 4in6 tunnels, we shold not just use it as IPv6 header.
+
+This patch fixes it by checking if skb->protocol is ETH_P_IPV6: if it
+is, we are safe to do the nexthop lookup using skb_dst() and
+ipv6_hdr(skb)->daddr; if not (aka IPv4), we have no clue about which
+dest address we can pick here, we have to rely on callers to fill it
+from tunnel config, so just fall to ip6_route_output() to make the
+decision.
+
+Fixes: ea3dc9601bda ("ip6_tunnel: Add support for wildcard tunnel endpoints.")
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Tested-by: Andrey Konovalov <andreyknvl@google.com>
+Cc: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_tunnel.c |   44 +++++++++++++++++++++++---------------------
+ 1 file changed, 23 insertions(+), 21 deletions(-)
+
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -1037,7 +1037,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, st
+       struct ip6_tnl *t = netdev_priv(dev);
+       struct net *net = t->net;
+       struct net_device_stats *stats = &t->dev->stats;
+-      struct ipv6hdr *ipv6h = ipv6_hdr(skb);
++      struct ipv6hdr *ipv6h;
+       struct ipv6_tel_txoption opt;
+       struct dst_entry *dst = NULL, *ndst = NULL;
+       struct net_device *tdev;
+@@ -1057,26 +1057,28 @@ int ip6_tnl_xmit(struct sk_buff *skb, st
+       /* NBMA tunnel */
+       if (ipv6_addr_any(&t->parms.raddr)) {
+-              struct in6_addr *addr6;
+-              struct neighbour *neigh;
+-              int addr_type;
+-
+-              if (!skb_dst(skb))
+-                      goto tx_err_link_failure;
+-
+-              neigh = dst_neigh_lookup(skb_dst(skb),
+-                                       &ipv6_hdr(skb)->daddr);
+-              if (!neigh)
+-                      goto tx_err_link_failure;
+-
+-              addr6 = (struct in6_addr *)&neigh->primary_key;
+-              addr_type = ipv6_addr_type(addr6);
+-
+-              if (addr_type == IPV6_ADDR_ANY)
+-                      addr6 = &ipv6_hdr(skb)->daddr;
+-
+-              memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
+-              neigh_release(neigh);
++              if (skb->protocol == htons(ETH_P_IPV6)) {
++                      struct in6_addr *addr6;
++                      struct neighbour *neigh;
++                      int addr_type;
++
++                      if (!skb_dst(skb))
++                              goto tx_err_link_failure;
++
++                      neigh = dst_neigh_lookup(skb_dst(skb),
++                                               &ipv6_hdr(skb)->daddr);
++                      if (!neigh)
++                              goto tx_err_link_failure;
++
++                      addr6 = (struct in6_addr *)&neigh->primary_key;
++                      addr_type = ipv6_addr_type(addr6);
++
++                      if (addr_type == IPV6_ADDR_ANY)
++                              addr6 = &ipv6_hdr(skb)->daddr;
++
++                      memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
++                      neigh_release(neigh);
++              }
+       } else if (!(t->parms.flags &
+                    (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
+               /* enable the cache only only if the routing decision does
diff --git a/queue-4.9/ipv6-fix-idev-addr_list-corruption.patch b/queue-4.9/ipv6-fix-idev-addr_list-corruption.patch
new file mode 100644 (file)
index 0000000..f7a4b39
--- /dev/null
@@ -0,0 +1,70 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Rabin Vincent <rabinv@axis.com>
+Date: Mon, 10 Apr 2017 08:36:39 +0200
+Subject: ipv6: Fix idev->addr_list corruption
+
+From: Rabin Vincent <rabinv@axis.com>
+
+
+[ Upstream commit a2d6cbb0670d54806f18192cb0db266b4a6d285a ]
+
+addrconf_ifdown() removes elements from the idev->addr_list without
+holding the idev->lock.
+
+If this happens while the loop in __ipv6_dev_get_saddr() is handling the
+same element, that function ends up in an infinite loop:
+
+  NMI watchdog: BUG: soft lockup - CPU#1 stuck for 23s! [test:1719]
+  Call Trace:
+   ipv6_get_saddr_eval+0x13c/0x3a0
+   __ipv6_dev_get_saddr+0xe4/0x1f0
+   ipv6_dev_get_saddr+0x1b4/0x204
+   ip6_dst_lookup_tail+0xcc/0x27c
+   ip6_dst_lookup_flow+0x38/0x80
+   udpv6_sendmsg+0x708/0xba8
+   sock_sendmsg+0x18/0x30
+   SyS_sendto+0xb8/0xf8
+   syscall_common+0x34/0x58
+
+Fixes: 6a923934c33 (Revert "ipv6: Revert optional address flusing on ifdown.")
+Signed-off-by: Rabin Vincent <rabinv@axis.com>
+Acked-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -3602,14 +3602,19 @@ restart:
+       INIT_LIST_HEAD(&del_list);
+       list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
+               struct rt6_info *rt = NULL;
++              bool keep;
+               addrconf_del_dad_work(ifa);
++              keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
++                      !addr_is_local(&ifa->addr);
++              if (!keep)
++                      list_move(&ifa->if_list, &del_list);
++
+               write_unlock_bh(&idev->lock);
+               spin_lock_bh(&ifa->lock);
+-              if (keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
+-                  !addr_is_local(&ifa->addr)) {
++              if (keep) {
+                       /* set state to skip the notifier below */
+                       state = INET6_IFADDR_STATE_DEAD;
+                       ifa->state = 0;
+@@ -3621,8 +3626,6 @@ restart:
+               } else {
+                       state = ifa->state;
+                       ifa->state = INET6_IFADDR_STATE_DEAD;
+-
+-                      list_move(&ifa->if_list, &del_list);
+               }
+               spin_unlock_bh(&ifa->lock);
diff --git a/queue-4.9/kcm-return-immediately-after-copy_from_user-failure.patch b/queue-4.9/kcm-return-immediately-after-copy_from_user-failure.patch
new file mode 100644 (file)
index 0000000..9b5dfe7
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Thu, 23 Mar 2017 11:03:31 -0700
+Subject: kcm: return immediately after copy_from_user() failure
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit a80db69e47d764bbcaf2fec54b1f308925e7c490 ]
+
+There is no reason to continue after a copy_from_user()
+failure.
+
+Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module")
+Cc: Tom Herbert <tom@herbertland.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/kcm/kcmsock.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/kcm/kcmsock.c
++++ b/net/kcm/kcmsock.c
+@@ -1685,7 +1685,7 @@ static int kcm_ioctl(struct socket *sock
+               struct kcm_attach info;
+               if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
+-                      err = -EFAULT;
++                      return -EFAULT;
+               err = kcm_attach_ioctl(sock, &info);
+@@ -1695,7 +1695,7 @@ static int kcm_ioctl(struct socket *sock
+               struct kcm_unattach info;
+               if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
+-                      err = -EFAULT;
++                      return -EFAULT;
+               err = kcm_unattach_ioctl(sock, &info);
+@@ -1706,7 +1706,7 @@ static int kcm_ioctl(struct socket *sock
+               struct socket *newsock = NULL;
+               if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
+-                      err = -EFAULT;
++                      return -EFAULT;
+               err = kcm_clone(sock, &info, &newsock);
diff --git a/queue-4.9/l2tp-fix-ppp-pseudo-wire-auto-loading.patch b/queue-4.9/l2tp-fix-ppp-pseudo-wire-auto-loading.patch
new file mode 100644 (file)
index 0000000..cce0924
--- /dev/null
@@ -0,0 +1,28 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Mon, 3 Apr 2017 13:23:15 +0200
+Subject: l2tp: fix PPP pseudo-wire auto-loading
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit 249ee819e24c180909f43c1173c8ef6724d21faf ]
+
+PPP pseudo-wire type is 7 (11 is L2TP_PWTYPE_IP).
+
+Fixes: f1f39f911027 ("l2tp: auto load type modules")
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_ppp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/l2tp/l2tp_ppp.c
++++ b/net/l2tp/l2tp_ppp.c
+@@ -1848,4 +1848,4 @@ MODULE_DESCRIPTION("PPP over L2TP over U
+ MODULE_LICENSE("GPL");
+ MODULE_VERSION(PPPOL2TP_DRV_VERSION);
+ MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP);
+-MODULE_ALIAS_L2TP_PWTYPE(11);
++MODULE_ALIAS_L2TP_PWTYPE(7);
diff --git a/queue-4.9/l2tp-hold-tunnel-socket-when-handling-control-frames-in-l2tp_ip-and-l2tp_ip6.patch b/queue-4.9/l2tp-hold-tunnel-socket-when-handling-control-frames-in-l2tp_ip-and-l2tp_ip6.patch
new file mode 100644 (file)
index 0000000..66d9978
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Wed, 29 Mar 2017 08:44:59 +0200
+Subject: l2tp: hold tunnel socket when handling control frames in l2tp_ip and l2tp_ip6
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit 94d7ee0baa8b764cf64ad91ed69464c1a6a0066b ]
+
+The code following l2tp_tunnel_find() expects that a new reference is
+held on sk. Either sk_receive_skb() or the discard_put error path will
+drop a reference from the tunnel's socket.
+
+This issue exists in both l2tp_ip and l2tp_ip6.
+
+Fixes: a3c18422a4b4 ("l2tp: hold socket before dropping lock in l2tp_ip{, 6}_recv()")
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_ip.c  |    5 +++--
+ net/l2tp/l2tp_ip6.c |    5 +++--
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+--- a/net/l2tp/l2tp_ip.c
++++ b/net/l2tp/l2tp_ip.c
+@@ -178,9 +178,10 @@ pass_up:
+       tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
+       tunnel = l2tp_tunnel_find(net, tunnel_id);
+-      if (tunnel != NULL)
++      if (tunnel) {
+               sk = tunnel->sock;
+-      else {
++              sock_hold(sk);
++      } else {
+               struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
+               read_lock_bh(&l2tp_ip_lock);
+--- a/net/l2tp/l2tp_ip6.c
++++ b/net/l2tp/l2tp_ip6.c
+@@ -191,9 +191,10 @@ pass_up:
+       tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
+       tunnel = l2tp_tunnel_find(net, tunnel_id);
+-      if (tunnel != NULL)
++      if (tunnel) {
+               sk = tunnel->sock;
+-      else {
++              sock_hold(sk);
++      } else {
+               struct ipv6hdr *iph = ipv6_hdr(skb);
+               read_lock_bh(&l2tp_ip6_lock);
diff --git a/queue-4.9/l2tp-purge-socket-queues-in-the-.destruct-callback.patch b/queue-4.9/l2tp-purge-socket-queues-in-the-.destruct-callback.patch
new file mode 100644 (file)
index 0000000..7de2943
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Wed, 29 Mar 2017 08:45:29 +0200
+Subject: l2tp: purge socket queues in the .destruct() callback
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit e91793bb615cf6cdd59c0b6749fe173687bb0947 ]
+
+The Rx path may grab the socket right before pppol2tp_release(), but
+nothing guarantees that it will enqueue packets before
+skb_queue_purge(). Therefore, the socket can be destroyed without its
+queues fully purged.
+
+Fix this by purging queues in pppol2tp_session_destruct() where we're
+guaranteed nothing is still referencing the socket.
+
+Fixes: 9e9cb6221aa7 ("l2tp: fix userspace reception on plain L2TP sockets")
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_ppp.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/net/l2tp/l2tp_ppp.c
++++ b/net/l2tp/l2tp_ppp.c
+@@ -450,6 +450,10 @@ static void pppol2tp_session_close(struc
+ static void pppol2tp_session_destruct(struct sock *sk)
+ {
+       struct l2tp_session *session = sk->sk_user_data;
++
++      skb_queue_purge(&sk->sk_receive_queue);
++      skb_queue_purge(&sk->sk_write_queue);
++
+       if (session) {
+               sk->sk_user_data = NULL;
+               BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+@@ -488,9 +492,6 @@ static int pppol2tp_release(struct socke
+               l2tp_session_queue_purge(session);
+               sock_put(sk);
+       }
+-      skb_queue_purge(&sk->sk_receive_queue);
+-      skb_queue_purge(&sk->sk_write_queue);
+-
+       release_sock(sk);
+       /* This will delete the session context via
diff --git a/queue-4.9/l2tp-take-reference-on-sessions-being-dumped.patch b/queue-4.9/l2tp-take-reference-on-sessions-being-dumped.patch
new file mode 100644 (file)
index 0000000..f236b15
--- /dev/null
@@ -0,0 +1,159 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Mon, 3 Apr 2017 12:03:13 +0200
+Subject: l2tp: take reference on sessions being dumped
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit e08293a4ccbcc993ded0fdc46f1e57926b833d63 ]
+
+Take a reference on the sessions returned by l2tp_session_find_nth()
+(and rename it l2tp_session_get_nth() to reflect this change), so that
+caller is assured that the session isn't going to disappear while
+processing it.
+
+For procfs and debugfs handlers, the session is held in the .start()
+callback and dropped in .show(). Given that pppol2tp_seq_session_show()
+dereferences the associated PPPoL2TP socket and that
+l2tp_dfs_seq_session_show() might call pppol2tp_show(), we also need to
+call the session's .ref() callback to prevent the socket from going
+away from under us.
+
+Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
+Fixes: 0ad6614048cf ("l2tp: Add debugfs files for dumping l2tp debug info")
+Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP")
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_core.c    |    8 ++++++--
+ net/l2tp/l2tp_core.h    |    3 ++-
+ net/l2tp/l2tp_debugfs.c |   10 +++++++---
+ net/l2tp/l2tp_netlink.c |    7 +++++--
+ net/l2tp/l2tp_ppp.c     |   10 +++++++---
+ 5 files changed, 27 insertions(+), 11 deletions(-)
+
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -278,7 +278,8 @@ struct l2tp_session *l2tp_session_find(s
+ }
+ EXPORT_SYMBOL_GPL(l2tp_session_find);
+-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
++struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
++                                        bool do_ref)
+ {
+       int hash;
+       struct l2tp_session *session;
+@@ -288,6 +289,9 @@ struct l2tp_session *l2tp_session_find_n
+       for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
+               hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
+                       if (++count > nth) {
++                              l2tp_session_inc_refcount(session);
++                              if (do_ref && session->ref)
++                                      session->ref(session);
+                               read_unlock_bh(&tunnel->hlist_lock);
+                               return session;
+                       }
+@@ -298,7 +302,7 @@ struct l2tp_session *l2tp_session_find_n
+       return NULL;
+ }
+-EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
++EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
+ /* Lookup a session by interface name.
+  * This is very inefficient but is only used by management interfaces.
+--- a/net/l2tp/l2tp_core.h
++++ b/net/l2tp/l2tp_core.h
+@@ -243,7 +243,8 @@ out:
+ struct l2tp_session *l2tp_session_find(struct net *net,
+                                      struct l2tp_tunnel *tunnel,
+                                      u32 session_id);
+-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
++struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
++                                        bool do_ref);
+ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
+ struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
+ struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
+--- a/net/l2tp/l2tp_debugfs.c
++++ b/net/l2tp/l2tp_debugfs.c
+@@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct
+ static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
+ {
+-      pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
++      pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
+       pd->session_idx++;
+       if (pd->session == NULL) {
+@@ -238,10 +238,14 @@ static int l2tp_dfs_seq_show(struct seq_
+       }
+       /* Show the tunnel or session context */
+-      if (pd->session == NULL)
++      if (!pd->session) {
+               l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
+-      else
++      } else {
+               l2tp_dfs_seq_session_show(m, pd->session);
++              if (pd->session->deref)
++                      pd->session->deref(pd->session);
++              l2tp_session_dec_refcount(pd->session);
++      }
+ out:
+       return 0;
+--- a/net/l2tp/l2tp_netlink.c
++++ b/net/l2tp/l2tp_netlink.c
+@@ -844,7 +844,7 @@ static int l2tp_nl_cmd_session_dump(stru
+                               goto out;
+               }
+-              session = l2tp_session_find_nth(tunnel, si);
++              session = l2tp_session_get_nth(tunnel, si, false);
+               if (session == NULL) {
+                       ti++;
+                       tunnel = NULL;
+@@ -854,8 +854,11 @@ static int l2tp_nl_cmd_session_dump(stru
+               if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
+                                        cb->nlh->nlmsg_seq, NLM_F_MULTI,
+-                                       session, L2TP_CMD_SESSION_GET) < 0)
++                                       session, L2TP_CMD_SESSION_GET) < 0) {
++                      l2tp_session_dec_refcount(session);
+                       break;
++              }
++              l2tp_session_dec_refcount(session);
+               si++;
+       }
+--- a/net/l2tp/l2tp_ppp.c
++++ b/net/l2tp/l2tp_ppp.c
+@@ -1555,7 +1555,7 @@ static void pppol2tp_next_tunnel(struct
+ static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
+ {
+-      pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
++      pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
+       pd->session_idx++;
+       if (pd->session == NULL) {
+@@ -1682,10 +1682,14 @@ static int pppol2tp_seq_show(struct seq_
+       /* Show the tunnel or session context.
+        */
+-      if (pd->session == NULL)
++      if (!pd->session) {
+               pppol2tp_seq_tunnel_show(m, pd->tunnel);
+-      else
++      } else {
+               pppol2tp_seq_session_show(m, pd->session);
++              if (pd->session->deref)
++                      pd->session->deref(pd->session);
++              l2tp_session_dec_refcount(pd->session);
++      }
+ out:
+       return 0;
diff --git a/queue-4.9/macvlan-fix-device-ref-leak-when-purging-bc_queue.patch b/queue-4.9/macvlan-fix-device-ref-leak-when-purging-bc_queue.patch
new file mode 100644 (file)
index 0000000..a6f5f40
--- /dev/null
@@ -0,0 +1,52 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Thu, 20 Apr 2017 20:55:12 +0800
+Subject: macvlan: Fix device ref leak when purging bc_queue
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+
+[ Upstream commit f6478218e6edc2a587b8f132f66373baa7b2497c ]
+
+When a parent macvlan device is destroyed we end up purging its
+broadcast queue without dropping the device reference count on
+the packet source device.  This causes the source device to linger.
+
+This patch drops that reference count.
+
+Fixes: 260916dfb48c ("macvlan: Fix potential use-after free for...")
+Reported-by: Joe Ghalam <Joe.Ghalam@dell.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macvlan.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/macvlan.c
++++ b/drivers/net/macvlan.c
+@@ -1140,6 +1140,7 @@ static int macvlan_port_create(struct ne
+ static void macvlan_port_destroy(struct net_device *dev)
+ {
+       struct macvlan_port *port = macvlan_port_get_rtnl(dev);
++      struct sk_buff *skb;
+       dev->priv_flags &= ~IFF_MACVLAN_PORT;
+       netdev_rx_handler_unregister(dev);
+@@ -1148,7 +1149,15 @@ static void macvlan_port_destroy(struct
+        * but we need to cancel it and purge left skbs if any.
+        */
+       cancel_work_sync(&port->bc_work);
+-      __skb_queue_purge(&port->bc_queue);
++
++      while ((skb = __skb_dequeue(&port->bc_queue))) {
++              const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src;
++
++              if (src)
++                      dev_put(src->dev);
++
++              kfree_skb(skb);
++      }
+       kfree_rcu(port, rcu);
+ }
diff --git a/queue-4.9/net-ipv4-fix-multipath-rtm_getroute-behavior-when-iif-is-given.patch b/queue-4.9/net-ipv4-fix-multipath-rtm_getroute-behavior-when-iif-is-given.patch
new file mode 100644 (file)
index 0000000..151e2dd
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Florian Larysch <fl@n621.de>
+Date: Mon, 3 Apr 2017 16:46:09 +0200
+Subject: net: ipv4: fix multipath RTM_GETROUTE behavior when iif is given
+
+From: Florian Larysch <fl@n621.de>
+
+
+[ Upstream commit a8801799c6975601fd58ae62f48964caec2eb83f ]
+
+inet_rtm_getroute synthesizes a skeletal ICMP skb, which is passed to
+ip_route_input when iif is given. If a multipath route is present for
+the designated destination, ip_multipath_icmp_hash ends up being called,
+which uses the source/destination addresses within the skb to calculate
+a hash. However, those are not set in the synthetic skb, causing it to
+return an arbitrary and incorrect result.
+
+Instead, use UDP, which gets no such special treatment.
+
+Signed-off-by: Florian Larysch <fl@n621.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -2569,7 +2569,7 @@ static int inet_rtm_getroute(struct sk_b
+       skb_reset_network_header(skb);
+       /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
+-      ip_hdr(skb)->protocol = IPPROTO_ICMP;
++      ip_hdr(skb)->protocol = IPPROTO_UDP;
+       skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+       src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
diff --git a/queue-4.9/net-ipv6-regenerate-host-route-if-moved-to-gc-list.patch b/queue-4.9/net-ipv6-regenerate-host-route-if-moved-to-gc-list.patch
new file mode 100644 (file)
index 0000000..7bdc906
--- /dev/null
@@ -0,0 +1,81 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: David Ahern <dsa@cumulusnetworks.com>
+Date: Tue, 25 Apr 2017 09:17:29 -0700
+Subject: net: ipv6: regenerate host route if moved to gc list
+
+From: David Ahern <dsa@cumulusnetworks.com>
+
+
+[ Upstream commit 8048ced9beb21a52e3305f3332ae82020619f24e ]
+
+Taking down the loopback device wreaks havoc on IPv6 routing. By
+extension, taking down a VRF device wreaks havoc on its table.
+
+Dmitry and Andrey both reported heap out-of-bounds reports in the IPv6
+FIB code while running syzkaller fuzzer. The root cause is a dead dst
+that is on the garbage list gets reinserted into the IPv6 FIB. While on
+the gc (or perhaps when it gets added to the gc list) the dst->next is
+set to an IPv4 dst. A subsequent walk of the ipv6 tables causes the
+out-of-bounds access.
+
+Andrey's reproducer was the key to getting to the bottom of this.
+
+With IPv6, host routes for an address have the dst->dev set to the
+loopback device. When the 'lo' device is taken down, rt6_ifdown initiates
+a walk of the fib evicting routes with the 'lo' device which means all
+host routes are removed. That process moves the dst which is attached to
+an inet6_ifaddr to the gc list and marks it as dead.
+
+The recent change to keep global IPv6 addresses added a new function,
+fixup_permanent_addr, that is called on admin up. That function restarts
+dad for an inet6_ifaddr and when it completes the host route attached
+to it is inserted into the fib. Since the route was marked dead and
+moved to the gc list, re-inserting the route causes the reported
+out-of-bounds accesses. If the device with the address is taken down
+or the address is removed, the WARN_ON in fib6_del is triggered.
+
+All of those faults are fixed by regenerating the host route if the
+existing one has been moved to the gc list, something that can be
+determined by checking if the rt6i_ref counter is 0.
+
+Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |   14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -3253,14 +3253,24 @@ static void addrconf_gre_config(struct n
+ static int fixup_permanent_addr(struct inet6_dev *idev,
+                               struct inet6_ifaddr *ifp)
+ {
+-      if (!ifp->rt) {
+-              struct rt6_info *rt;
++      /* rt6i_ref == 0 means the host route was removed from the
++       * FIB, for example, if 'lo' device is taken down. In that
++       * case regenerate the host route.
++       */
++      if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
++              struct rt6_info *rt, *prev;
+               rt = addrconf_dst_alloc(idev, &ifp->addr, false);
+               if (unlikely(IS_ERR(rt)))
+                       return PTR_ERR(rt);
++              /* ifp->rt can be accessed outside of rtnl */
++              spin_lock(&ifp->lock);
++              prev = ifp->rt;
+               ifp->rt = rt;
++              spin_unlock(&ifp->lock);
++
++              ip6_rt_put(prev);
+       }
+       if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
diff --git a/queue-4.9/net-ipv6-rtf_pcpu-should-not-be-settable-from-userspace.patch b/queue-4.9/net-ipv6-rtf_pcpu-should-not-be-settable-from-userspace.patch
new file mode 100644 (file)
index 0000000..c67fb8e
--- /dev/null
@@ -0,0 +1,79 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: David Ahern <dsa@cumulusnetworks.com>
+Date: Wed, 19 Apr 2017 14:19:43 -0700
+Subject: net: ipv6: RTF_PCPU should not be settable from userspace
+
+From: David Ahern <dsa@cumulusnetworks.com>
+
+
+[ Upstream commit 557c44be917c322860665be3d28376afa84aa936 ]
+
+Andrey reported a fault in the IPv6 route code:
+
+kasan: GPF could be caused by NULL-ptr deref or user memory access
+general protection fault: 0000 [#1] SMP KASAN
+Modules linked in:
+CPU: 1 PID: 4035 Comm: a.out Not tainted 4.11.0-rc7+ #250
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+task: ffff880069809600 task.stack: ffff880062dc8000
+RIP: 0010:ip6_rt_cache_alloc+0xa6/0x560 net/ipv6/route.c:975
+RSP: 0018:ffff880062dced30 EFLAGS: 00010206
+RAX: dffffc0000000000 RBX: ffff8800670561c0 RCX: 0000000000000006
+RDX: 0000000000000003 RSI: ffff880062dcfb28 RDI: 0000000000000018
+RBP: ffff880062dced68 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
+R13: ffff880062dcfb28 R14: dffffc0000000000 R15: 0000000000000000
+FS:  00007feebe37e7c0(0000) GS:ffff88006cb00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00000000205a0fe4 CR3: 000000006b5c9000 CR4: 00000000000006e0
+Call Trace:
+ ip6_pol_route+0x1512/0x1f20 net/ipv6/route.c:1128
+ ip6_pol_route_output+0x4c/0x60 net/ipv6/route.c:1212
+...
+
+Andrey's syzkaller program passes rtmsg.rtmsg_flags with the RTF_PCPU bit
+set. Flags passed to the kernel are blindly copied to the allocated
+rt6_info by ip6_route_info_create making a newly inserted route appear
+as though it is a per-cpu route. ip6_rt_cache_alloc sees the flag set
+and expects rt->dst.from to be set - which it is not since it is not
+really a per-cpu copy. The subsequent call to __ip6_dst_alloc then
+generates the fault.
+
+Fix by checking for the flag and failing with EINVAL.
+
+Fixes: d52d3997f843f ("ipv6: Create percpu rt6_info")
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Tested-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/ipv6_route.h |    2 +-
+ net/ipv6/route.c                |    4 ++++
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/include/uapi/linux/ipv6_route.h
++++ b/include/uapi/linux/ipv6_route.h
+@@ -34,7 +34,7 @@
+ #define RTF_PREF(pref)        ((pref) << 27)
+ #define RTF_PREF_MASK 0x18000000
+-#define RTF_PCPU      0x40000000
++#define RTF_PCPU      0x40000000      /* read-only: can not be set by user */
+ #define RTF_LOCAL     0x80000000
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1826,6 +1826,10 @@ static struct rt6_info *ip6_route_info_c
+       int addr_type;
+       int err = -EINVAL;
++      /* RTF_PCPU is an internal flag; can not be set by userspace */
++      if (cfg->fc_flags & RTF_PCPU)
++              goto out;
++
+       if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
+               goto out;
+ #ifndef CONFIG_IPV6_SUBTREES
diff --git a/queue-4.9/net-mlx5-avoid-dereferencing-uninitialized-pointer.patch b/queue-4.9/net-mlx5-avoid-dereferencing-uninitialized-pointer.patch
new file mode 100644 (file)
index 0000000..826a852
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Talat Batheesh <talatb@mellanox.com>
+Date: Tue, 28 Mar 2017 16:13:41 +0300
+Subject: net/mlx5: Avoid dereferencing uninitialized pointer
+
+From: Talat Batheesh <talatb@mellanox.com>
+
+
+[ Upstream commit e497ec680c4cd51e76bfcdd49363d9ab8d32a757 ]
+
+In NETDEV_CHANGEUPPER event the upper_info field is valid
+only when linking is true. Otherwise it should be ignored.
+
+Fixes: 7907f23adc18 (net/mlx5: Implement RoCE LAG feature)
+Signed-off-by: Talat Batheesh <talatb@mellanox.com>
+Reviewed-by: Aviv Heller <avivh@mellanox.com>
+Reviewed-by: Moni Shoua <monis@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lag.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+@@ -294,7 +294,7 @@ static int mlx5_handle_changeupper_event
+                                        struct netdev_notifier_changeupper_info *info)
+ {
+       struct net_device *upper = info->upper_dev, *ndev_tmp;
+-      struct netdev_lag_upper_info *lag_upper_info;
++      struct netdev_lag_upper_info *lag_upper_info = NULL;
+       bool is_bonded;
+       int bond_status = 0;
+       int num_slaves = 0;
+@@ -303,7 +303,8 @@ static int mlx5_handle_changeupper_event
+       if (!netif_is_lag_master(upper))
+               return 0;
+-      lag_upper_info = info->upper_info;
++      if (info->linking)
++              lag_upper_info = info->upper_info;
+       /* The event may still be of interest if the slave does not belong to
+        * us, but is enslaved to a master which has one or more of our netdevs
diff --git a/queue-4.9/net-mlx5-fix-driver-load-bad-flow-when-having-fw-initializing-timeout.patch b/queue-4.9/net-mlx5-fix-driver-load-bad-flow-when-having-fw-initializing-timeout.patch
new file mode 100644 (file)
index 0000000..8226436
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Mohamad Haj Yahia <mohamad@mellanox.com>
+Date: Thu, 30 Mar 2017 17:00:25 +0300
+Subject: net/mlx5: Fix driver load bad flow when having fw initializing timeout
+
+From: Mohamad Haj Yahia <mohamad@mellanox.com>
+
+
+[ Upstream commit 55378a238e04b39cc82957d91d16499704ea719b ]
+
+If FW is stuck in initializing state we will skip the driver load, but
+current error handling flow doesn't clean previously allocated command
+interface resources.
+
+Fixes: e3297246c2c8 ('net/mlx5_core: Wait for FW readiness on startup')
+Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -966,7 +966,7 @@ static int mlx5_load_one(struct mlx5_cor
+       if (err) {
+               dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
+                       FW_INIT_TIMEOUT_MILI);
+-              goto out_err;
++              goto err_cmd_cleanup;
+       }
+       err = mlx5_core_enable_hca(dev, 0);
diff --git a/queue-4.9/net-mlx5e-fix-ethtool_grxclsrlall-handling.patch b/queue-4.9/net-mlx5e-fix-ethtool_grxclsrlall-handling.patch
new file mode 100644 (file)
index 0000000..e3d3aab
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Ilan Tayari <ilant@mellanox.com>
+Date: Thu, 2 Mar 2017 15:49:45 +0200
+Subject: net/mlx5e: Fix ETHTOOL_GRXCLSRLALL handling
+
+From: Ilan Tayari <ilant@mellanox.com>
+
+
+[ Upstream commit 5e82c9e4ed60beba83f46a1a5a8307b99a23e982 ]
+
+Handler for ETHTOOL_GRXCLSRLALL must set info->data to the size
+of the table, regardless of the amount of entries in it.
+Existing code does not do that, and this breaks all usage of ethtool -N
+or -n without explicit location, with this error:
+rmgr: Invalid RX class rules table size: Success
+
+Set info->data to the table size.
+
+Tested:
+ethtool -n ens8
+ethtool -N ens8 flow-type ip4 src-ip 1.1.1.1 dst-ip 2.2.2.2 action 1
+ethtool -N ens8 flow-type ip4 src-ip 1.1.1.1 dst-ip 2.2.2.2 action 1 loc 55
+ethtool -n ens8
+ethtool -N ens8 delete 1023
+ethtool -N ens8 delete 55
+
+Fixes: f913a72aa008 ("net/mlx5e: Add support to get ethtool flow rules")
+Signed-off-by: Ilan Tayari <ilant@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+@@ -563,6 +563,7 @@ int mlx5e_ethtool_get_all_flows(struct m
+       int idx = 0;
+       int err = 0;
++      info->data = MAX_NUM_OF_ETHTOOL_RULES;
+       while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
+               err = mlx5e_ethtool_get_flow(priv, info, location);
+               if (!err)
diff --git a/queue-4.9/net-mlx5e-fix-small-packet-threshold.patch b/queue-4.9/net-mlx5e-fix-small-packet-threshold.patch
new file mode 100644 (file)
index 0000000..4ff47bf
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Wed, 22 Mar 2017 11:44:14 +0200
+Subject: net/mlx5e: Fix small packet threshold
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit cbad8cddb6ed7ef3a5f0a9a70f1711d4d7fb9a8f ]
+
+RX packet headers are meant to be contained in SKB linear part,
+and chose a threshold of 128.
+It turns out this is not enough, i.e. for IPv6 packet over VxLAN.
+In this case, UDP/IPv4 needs 42 bytes, GENEVE header is 8 bytes,
+and 86 bytes for TCP/IPv6. In total 136 bytes that is more than
+current 128 bytes. In this case expand header flow is reached.
+The warning in skb_try_coalesce() caused by a wrong truesize
+was already fixed here:
+commit 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()").
+Still, we prefer to totally avoid the expand header flow for performance reasons.
+Tested regular TCP_STREAM with iperf for 1 and 8 streams, no degradation was found.
+
+Fixes: 461017cb006a ("net/mlx5e: Support RX multi-packet WQE (Striding RQ)")
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -82,7 +82,7 @@
+ #define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) <= U16_MAX)
+ #define MLX5_UMR_ALIGN                                (2048)
+-#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD     (128)
++#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD     (256)
+ #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ                 (64 * 1024)
+ #define MLX5E_DEFAULT_LRO_TIMEOUT                       32
diff --git a/queue-4.9/net-neigh-guard-against-null-solicit-method.patch b/queue-4.9/net-neigh-guard-against-null-solicit-method.patch
new file mode 100644 (file)
index 0000000..e15f091
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 23 Mar 2017 12:39:21 -0700
+Subject: net: neigh: guard against NULL solicit() method
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 48481c8fa16410ffa45939b13b6c53c2ca609e5f ]
+
+Dmitry posted a nice reproducer of a bug triggering in neigh_probe()
+when dereferencing a NULL neigh->ops->solicit method.
+
+This can happen for arp_direct_ops/ndisc_direct_ops and similar,
+which can be used for NUD_NOARP neighbours (created when dev->header_ops
+is NULL). Admin can then force changing nud_state to some other state
+that would fire neigh timer.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/neighbour.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -859,7 +859,8 @@ static void neigh_probe(struct neighbour
+       if (skb)
+               skb = skb_clone(skb, GFP_ATOMIC);
+       write_unlock(&neigh->lock);
+-      neigh->ops->solicit(neigh, skb);
++      if (neigh->ops->solicit)
++              neigh->ops->solicit(neigh, skb);
+       atomic_inc(&neigh->probes);
+       kfree_skb(skb);
+ }
diff --git a/queue-4.9/net-packet-fix-overflow-in-check-for-tp_frame_nr.patch b/queue-4.9/net-packet-fix-overflow-in-check-for-tp_frame_nr.patch
new file mode 100644 (file)
index 0000000..30332f4
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Andrey Konovalov <andreyknvl@google.com>
+Date: Wed, 29 Mar 2017 16:11:21 +0200
+Subject: net/packet: fix overflow in check for tp_frame_nr
+
+From: Andrey Konovalov <andreyknvl@google.com>
+
+
+[ Upstream commit 8f8d28e4d6d815a391285e121c3a53a0b6cb9e7b ]
+
+When calculating rb->frames_per_block * req->tp_block_nr the result
+can overflow.
+
+Add a check that tp_block_size * tp_block_nr <= UINT_MAX.
+
+Since frames_per_block <= tp_block_size, the expression would
+never overflow.
+
+Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -4247,6 +4247,8 @@ static int packet_set_ring(struct sock *
+               rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
+               if (unlikely(rb->frames_per_block == 0))
+                       goto out;
++              if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr))
++                      goto out;
+               if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
+                                       req->tp_frame_nr))
+                       goto out;
diff --git a/queue-4.9/net-packet-fix-overflow-in-check-for-tp_reserve.patch b/queue-4.9/net-packet-fix-overflow-in-check-for-tp_reserve.patch
new file mode 100644 (file)
index 0000000..5759887
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Andrey Konovalov <andreyknvl@google.com>
+Date: Wed, 29 Mar 2017 16:11:22 +0200
+Subject: net/packet: fix overflow in check for tp_reserve
+
+From: Andrey Konovalov <andreyknvl@google.com>
+
+
+[ Upstream commit bcc5364bdcfe131e6379363f089e7b4108d35b70 ]
+
+When calculating po->tp_hdrlen + po->tp_reserve the result can overflow.
+
+Fix by checking that tp_reserve <= INT_MAX on assign.
+
+Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -3702,6 +3702,8 @@ packet_setsockopt(struct socket *sock, i
+                       return -EBUSY;
+               if (copy_from_user(&val, optval, sizeof(val)))
+                       return -EFAULT;
++              if (val > INT_MAX)
++                      return -EINVAL;
+               po->tp_reserve = val;
+               return 0;
+       }
diff --git a/queue-4.9/net-phy-fix-auto-negotiation-stall-due-to-unavailable-interrupt.patch b/queue-4.9/net-phy-fix-auto-negotiation-stall-due-to-unavailable-interrupt.patch
new file mode 100644 (file)
index 0000000..046cd98
--- /dev/null
@@ -0,0 +1,129 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Alexander Kochetkov <al.kochet@gmail.com>
+Date: Thu, 20 Apr 2017 14:00:04 +0300
+Subject: net: phy: fix auto-negotiation stall due to unavailable interrupt
+
+From: Alexander Kochetkov <al.kochet@gmail.com>
+
+
+[ Upstream commit f555f34fdc586a56204cd16d9a7c104ec6cb6650 ]
+
+The Ethernet link on an interrupt driven PHY was not coming up if the Ethernet
+cable was plugged before the Ethernet interface was brought up.
+
+The patch trigger PHY state machine to update link state if PHY was requested to
+do auto-negotiation and auto-negotiation complete flag already set.
+
+During power-up cycle the PHY do auto-negotiation, generate interrupt and set
+auto-negotiation complete flag. Interrupt is handled by PHY state machine but
+doesn't update link state because PHY is in PHY_READY state. After some time
+MAC bring up, start and request PHY to do auto-negotiation. If there are no new
+settings to advertise genphy_config_aneg() doesn't start PHY auto-negotiation.
+PHY continue to stay in auto-negotiation complete state and doesn't fire
+interrupt. At the same time PHY state machine expect that PHY started
+auto-negotiation and is waiting for interrupt from PHY and it won't get it.
+
+Fixes: 321beec5047a ("net: phy: Use interrupts when available in NOLINK state")
+Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>
+Cc: stable <stable@vger.kernel.org> # v4.9+
+Tested-by: Roger Quadros <rogerq@ti.com>
+Tested-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy.c |   40 ++++++++++++++++++++++++++++++++++++----
+ include/linux/phy.h   |    1 +
+ 2 files changed, 37 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/phy/phy.c
++++ b/drivers/net/phy/phy.c
+@@ -552,16 +552,18 @@ int phy_mii_ioctl(struct phy_device *phy
+ EXPORT_SYMBOL(phy_mii_ioctl);
+ /**
+- * phy_start_aneg - start auto-negotiation for this PHY device
++ * phy_start_aneg_priv - start auto-negotiation for this PHY device
+  * @phydev: the phy_device struct
++ * @sync: indicate whether we should wait for the workqueue cancelation
+  *
+  * Description: Sanitizes the settings (if we're not autonegotiating
+  *   them), and then calls the driver's config_aneg function.
+  *   If the PHYCONTROL Layer is operating, we change the state to
+  *   reflect the beginning of Auto-negotiation or forcing.
+  */
+-int phy_start_aneg(struct phy_device *phydev)
++static int phy_start_aneg_priv(struct phy_device *phydev, bool sync)
+ {
++      bool trigger = 0;
+       int err;
+       mutex_lock(&phydev->lock);
+@@ -586,10 +588,40 @@ int phy_start_aneg(struct phy_device *ph
+               }
+       }
++      /* Re-schedule a PHY state machine to check PHY status because
++       * negotiation may already be done and aneg interrupt may not be
++       * generated.
++       */
++      if (phy_interrupt_is_valid(phydev) && (phydev->state == PHY_AN)) {
++              err = phy_aneg_done(phydev);
++              if (err > 0) {
++                      trigger = true;
++                      err = 0;
++              }
++      }
++
+ out_unlock:
+       mutex_unlock(&phydev->lock);
++
++      if (trigger)
++              phy_trigger_machine(phydev, sync);
++
+       return err;
+ }
++
++/**
++ * phy_start_aneg - start auto-negotiation for this PHY device
++ * @phydev: the phy_device struct
++ *
++ * Description: Sanitizes the settings (if we're not autonegotiating
++ *   them), and then calls the driver's config_aneg function.
++ *   If the PHYCONTROL Layer is operating, we change the state to
++ *   reflect the beginning of Auto-negotiation or forcing.
++ */
++int phy_start_aneg(struct phy_device *phydev)
++{
++      return phy_start_aneg_priv(phydev, true);
++}
+ EXPORT_SYMBOL(phy_start_aneg);
+ /**
+@@ -617,7 +649,7 @@ void phy_start_machine(struct phy_device
+  *   state machine runs.
+  */
+-static void phy_trigger_machine(struct phy_device *phydev, bool sync)
++void phy_trigger_machine(struct phy_device *phydev, bool sync)
+ {
+       if (sync)
+               cancel_delayed_work_sync(&phydev->state_queue);
+@@ -1100,7 +1132,7 @@ void phy_state_machine(struct work_struc
+       mutex_unlock(&phydev->lock);
+       if (needs_aneg)
+-              err = phy_start_aneg(phydev);
++              err = phy_start_aneg_priv(phydev, false);
+       else if (do_suspend)
+               phy_suspend(phydev);
+--- a/include/linux/phy.h
++++ b/include/linux/phy.h
+@@ -806,6 +806,7 @@ void phy_change(struct work_struct *work
+ void phy_mac_interrupt(struct phy_device *phydev, int new_link);
+ void phy_start_machine(struct phy_device *phydev);
+ void phy_stop_machine(struct phy_device *phydev);
++void phy_trigger_machine(struct phy_device *phydev, bool sync);
+ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+ int phy_ethtool_ksettings_get(struct phy_device *phydev,
diff --git a/queue-4.9/net-phy-handle-state-correctly-in-phy_stop_machine.patch b/queue-4.9/net-phy-handle-state-correctly-in-phy_stop_machine.patch
new file mode 100644 (file)
index 0000000..b8be84c
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Nathan Sullivan <nathan.sullivan@ni.com>
+Date: Wed, 22 Mar 2017 15:27:01 -0500
+Subject: net: phy: handle state correctly in phy_stop_machine
+
+From: Nathan Sullivan <nathan.sullivan@ni.com>
+
+
+[ Upstream commit 49d52e8108a21749dc2114b924c907db43358984 ]
+
+If the PHY is halted on stop, then do not set the state to PHY_UP.  This
+ensures the phy will be restarted later in phy_start when the machine is
+started again.
+
+Fixes: 00db8189d984 ("This patch adds a PHY Abstraction Layer to the Linux Kernel, enabling ethernet drivers to remain as ignorant as is reasonable of the connected PHY's design and operation details.")
+Signed-off-by: Nathan Sullivan <nathan.sullivan@ni.com>
+Signed-off-by: Brad Mouring <brad.mouring@ni.com>
+Acked-by: Xander Huff <xander.huff@ni.com>
+Acked-by: Kyle Roeschley <kyle.roeschley@ni.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/phy/phy.c
++++ b/drivers/net/phy/phy.c
+@@ -639,7 +639,7 @@ void phy_stop_machine(struct phy_device
+       cancel_delayed_work_sync(&phydev->state_queue);
+       mutex_lock(&phydev->lock);
+-      if (phydev->state > PHY_UP)
++      if (phydev->state > PHY_UP && phydev->state != PHY_HALTED)
+               phydev->state = PHY_UP;
+       mutex_unlock(&phydev->lock);
+ }
diff --git a/queue-4.9/net-timestamp-avoid-use-after-free-in-ip_recv_error.patch b/queue-4.9/net-timestamp-avoid-use-after-free-in-ip_recv_error.patch
new file mode 100644 (file)
index 0000000..970466d
--- /dev/null
@@ -0,0 +1,105 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Wed, 12 Apr 2017 19:24:35 -0400
+Subject: net-timestamp: avoid use-after-free in ip_recv_error
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit 1862d6208db0aeca9c8ace44915b08d5ab2cd667 ]
+
+Syzkaller reported a use-after-free in ip_recv_error at line
+
+    info->ipi_ifindex = skb->dev->ifindex;
+
+This function is called on dequeue from the error queue, at which
+point the device pointer may no longer be valid.
+
+Save ifindex on enqueue in __skb_complete_tx_timestamp, when the
+pointer is valid or NULL. Store it in temporary storage skb->cb.
+
+It is safe to reference skb->dev here, as called from device drivers
+or dev_queue_xmit. The exception is when called from tcp_ack_tstamp;
+in that case it is NULL and ifindex is set to 0 (invalid).
+
+Do not return a pktinfo cmsg if ifindex is 0. This maintains the
+current behavior of not returning a cmsg if skb->dev was NULL.
+
+On dequeue, the ipv4 path will cast from sock_exterr_skb to
+in_pktinfo. Both have ifindex as their first element, so no explicit
+conversion is needed. This is by design, introduced in commit
+0b922b7a829c ("net: original ingress device index in PKTINFO"). For
+ipv6 ip6_datagram_support_cmsg converts to in6_pktinfo.
+
+Fixes: 829ae9d61165 ("net-timestamp: allow reading recv cmsg on errqueue with origin tstamp")
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skbuff.c      |    1 +
+ net/ipv4/ip_sockglue.c |    9 ++++-----
+ net/ipv6/datagram.c    |   10 +---------
+ 3 files changed, 6 insertions(+), 14 deletions(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -3779,6 +3779,7 @@ static void __skb_complete_tx_timestamp(
+       serr->ee.ee_errno = ENOMSG;
+       serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+       serr->ee.ee_info = tstype;
++      serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
+       if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
+               serr->ee.ee_data = skb_shinfo(skb)->tskey;
+               if (sk->sk_protocol == IPPROTO_TCP &&
+--- a/net/ipv4/ip_sockglue.c
++++ b/net/ipv4/ip_sockglue.c
+@@ -474,16 +474,15 @@ static bool ipv4_datagram_support_cmsg(c
+               return false;
+       /* Support IP_PKTINFO on tstamp packets if requested, to correlate
+-       * timestamp with egress dev. Not possible for packets without dev
++       * timestamp with egress dev. Not possible for packets without iif
+        * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
+        */
+-      if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
+-          (!skb->dev))
++      info = PKTINFO_SKB_CB(skb);
++      if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
++          !info->ipi_ifindex)
+               return false;
+-      info = PKTINFO_SKB_CB(skb);
+       info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
+-      info->ipi_ifindex = skb->dev->ifindex;
+       return true;
+ }
+--- a/net/ipv6/datagram.c
++++ b/net/ipv6/datagram.c
+@@ -400,9 +400,6 @@ static inline bool ipv6_datagram_support
+  * At one point, excluding local errors was a quick test to identify icmp/icmp6
+  * errors. This is no longer true, but the test remained, so the v6 stack,
+  * unlike v4, also honors cmsg requests on all wifi and timestamp errors.
+- *
+- * Timestamp code paths do not initialize the fields expected by cmsg:
+- * the PKTINFO fields in skb->cb[]. Fill those in here.
+  */
+ static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
+                                     struct sock_exterr_skb *serr)
+@@ -414,14 +411,9 @@ static bool ip6_datagram_support_cmsg(st
+       if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL)
+               return false;
+-      if (!skb->dev)
++      if (!IP6CB(skb)->iif)
+               return false;
+-      if (skb->protocol == htons(ETH_P_IPV6))
+-              IP6CB(skb)->iif = skb->dev->ifindex;
+-      else
+-              PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex;
+-
+       return true;
+ }
diff --git a/queue-4.9/net-vrf-fix-setting-nlm_f_excl-flag-when-adding-l3mdev-rule.patch b/queue-4.9/net-vrf-fix-setting-nlm_f_excl-flag-when-adding-l3mdev-rule.patch
new file mode 100644 (file)
index 0000000..3204d0f
--- /dev/null
@@ -0,0 +1,31 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: David Ahern <dsa@cumulusnetworks.com>
+Date: Thu, 13 Apr 2017 10:57:15 -0600
+Subject: net: vrf: Fix setting NLM_F_EXCL flag when adding l3mdev rule
+
+From: David Ahern <dsa@cumulusnetworks.com>
+
+
+[ Upstream commit 426c87caa2b4578b43cd3f689f02c65b743b2559 ]
+
+Only need 1 l3mdev FIB rule. Fix setting NLM_F_EXCL in the nlmsghdr.
+
+Fixes: 1aa6c4f6b8cd8 ("net: vrf: Add l3mdev rules on first device create")
+Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vrf.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -1124,7 +1124,7 @@ static int vrf_fib_rule(const struct net
+               goto nla_put_failure;
+       /* rule only needs to appear once */
+-      nlh->nlmsg_flags &= NLM_F_EXCL;
++      nlh->nlmsg_flags |= NLM_F_EXCL;
+       frh = nlmsg_data(nlh);
+       memset(frh, 0, sizeof(*frh));
diff --git a/queue-4.9/netpoll-check-for-skb-queue_mapping.patch b/queue-4.9/netpoll-check-for-skb-queue_mapping.patch
new file mode 100644 (file)
index 0000000..288e69c
--- /dev/null
@@ -0,0 +1,104 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Tushar Dave <tushar.n.dave@oracle.com>
+Date: Thu, 20 Apr 2017 15:57:31 -0700
+Subject: netpoll: Check for skb->queue_mapping
+
+From: Tushar Dave <tushar.n.dave@oracle.com>
+
+
+[ Upstream commit c70b17b775edb21280e9de7531acf6db3b365274 ]
+
+Reducing real_num_tx_queues needs to be in sync with skb queue_mapping
+otherwise skbs with queue_mapping greater than real_num_tx_queues
+can be sent to the underlying driver and can result in kernel panic.
+
+One such event is running netconsole and enabling VF on the same
+device. Or running netconsole and changing number of tx queues via
+ethtool on same device.
+
+e.g.
+Unable to handle kernel NULL pointer dereference
+tsk->{mm,active_mm}->context = 0000000000001525
+tsk->{mm,active_mm}->pgd = fff800130ff9a000
+              \|/ ____ \|/
+              "@'/ .. \`@"
+              /_| \__/ |_\
+                 \__U_/
+kworker/48:1(475): Oops [#1]
+CPU: 48 PID: 475 Comm: kworker/48:1 Tainted: G           OE
+4.11.0-rc3-davem-net+ #7
+Workqueue: events queue_process
+task: fff80013113299c0 task.stack: fff800131132c000
+TSTATE: 0000004480e01600 TPC: 00000000103f9e3c TNPC: 00000000103f9e40 Y:
+00000000    Tainted: G           OE
+TPC: <ixgbe_xmit_frame_ring+0x7c/0x6c0 [ixgbe]>
+g0: 0000000000000000 g1: 0000000000003fff g2: 0000000000000000 g3:
+0000000000000001
+g4: fff80013113299c0 g5: fff8001fa6808000 g6: fff800131132c000 g7:
+00000000000000c0
+o0: fff8001fa760c460 o1: fff8001311329a50 o2: fff8001fa7607504 o3:
+0000000000000003
+o4: fff8001f96e63a40 o5: fff8001311d77ec0 sp: fff800131132f0e1 ret_pc:
+000000000049ed94
+RPC: <set_next_entity+0x34/0xb80>
+l0: 0000000000000000 l1: 0000000000000800 l2: 0000000000000000 l3:
+0000000000000000
+l4: 000b2aa30e34b10d l5: 0000000000000000 l6: 0000000000000000 l7:
+fff8001fa7605028
+i0: fff80013111a8a00 i1: fff80013155a0780 i2: 0000000000000000 i3:
+0000000000000000
+i4: 0000000000000000 i5: 0000000000100000 i6: fff800131132f1a1 i7:
+00000000103fa4b0
+I7: <ixgbe_xmit_frame+0x30/0xa0 [ixgbe]>
+Call Trace:
+ [00000000103fa4b0] ixgbe_xmit_frame+0x30/0xa0 [ixgbe]
+ [0000000000998c74] netpoll_start_xmit+0xf4/0x200
+ [0000000000998e10] queue_process+0x90/0x160
+ [0000000000485fa8] process_one_work+0x188/0x480
+ [0000000000486410] worker_thread+0x170/0x4c0
+ [000000000048c6b8] kthread+0xd8/0x120
+ [0000000000406064] ret_from_fork+0x1c/0x2c
+ [0000000000000000]           (null)
+Disabling lock debugging due to kernel taint
+Caller[00000000103fa4b0]: ixgbe_xmit_frame+0x30/0xa0 [ixgbe]
+Caller[0000000000998c74]: netpoll_start_xmit+0xf4/0x200
+Caller[0000000000998e10]: queue_process+0x90/0x160
+Caller[0000000000485fa8]: process_one_work+0x188/0x480
+Caller[0000000000486410]: worker_thread+0x170/0x4c0
+Caller[000000000048c6b8]: kthread+0xd8/0x120
+Caller[0000000000406064]: ret_from_fork+0x1c/0x2c
+Caller[0000000000000000]:           (null)
+
+Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/netpoll.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/net/core/netpoll.c
++++ b/net/core/netpoll.c
+@@ -105,15 +105,21 @@ static void queue_process(struct work_st
+       while ((skb = skb_dequeue(&npinfo->txq))) {
+               struct net_device *dev = skb->dev;
+               struct netdev_queue *txq;
++              unsigned int q_index;
+               if (!netif_device_present(dev) || !netif_running(dev)) {
+                       kfree_skb(skb);
+                       continue;
+               }
+-              txq = skb_get_tx_queue(dev, skb);
+-
+               local_irq_save(flags);
++              /* check if skb->queue_mapping is still valid */
++              q_index = skb_get_queue_mapping(skb);
++              if (unlikely(q_index >= dev->real_num_tx_queues)) {
++                      q_index = q_index % dev->real_num_tx_queues;
++                      skb_set_queue_mapping(skb, q_index);
++              }
++              txq = netdev_get_tx_queue(dev, q_index);
+               HARD_TX_LOCK(dev, txq, smp_processor_id());
+               if (netif_xmit_frozen_or_stopped(txq) ||
+                   netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) {
diff --git a/queue-4.9/sctp-listen-on-the-sock-only-when-it-s-state-is-listening-or-closed.patch b/queue-4.9/sctp-listen-on-the-sock-only-when-it-s-state-is-listening-or-closed.patch
new file mode 100644 (file)
index 0000000..e4e0794
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 6 Apr 2017 13:10:52 +0800
+Subject: sctp: listen on the sock only when it's state is listening or closed
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 34b2789f1d9bf8dcca9b5cb553d076ca2cd898ee ]
+
+Now sctp doesn't check sock's state before listening on it. It could
+even cause changing a sock with any state to become a listening sock
+when doing sctp_listen.
+
+This patch is to fix it by checking sock's state in sctp_listen, so
+that it will listen on the sock with right state.
+
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Tested-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -6861,6 +6861,9 @@ int sctp_inet_listen(struct socket *sock
+       if (sock->state != SS_UNCONNECTED)
+               goto out;
++      if (!sctp_sstate(sk, LISTENING) && !sctp_sstate(sk, CLOSED))
++              goto out;
++
+       /* If backlog is zero, disable listening. */
+       if (!backlog) {
+               if (sctp_sstate(sk, CLOSED))
index 318f46bbe894b2e443079dc87f55db78f3e34f86..0fbdba19df55c034ca0a8c6eb4e4828143262c69 100644 (file)
@@ -1,2 +1,36 @@
 revert-mmc-sdhci-msm-enable-few-quirks.patch
 ping-implement-proper-locking.patch
+sparc64-kern_addr_valid-regression.patch
+sparc64-fix-kernel-panic-due-to-erroneous-ifdef-surrounding-pmd_write.patch
+net-neigh-guard-against-null-solicit-method.patch
+net-phy-handle-state-correctly-in-phy_stop_machine.patch
+kcm-return-immediately-after-copy_from_user-failure.patch
+bpf-improve-verifier-packet-range-checks.patch
+net-mlx5-avoid-dereferencing-uninitialized-pointer.patch
+l2tp-hold-tunnel-socket-when-handling-control-frames-in-l2tp_ip-and-l2tp_ip6.patch
+l2tp-purge-socket-queues-in-the-.destruct-callback.patch
+net-packet-fix-overflow-in-check-for-tp_frame_nr.patch
+net-packet-fix-overflow-in-check-for-tp_reserve.patch
+l2tp-take-reference-on-sessions-being-dumped.patch
+l2tp-fix-ppp-pseudo-wire-auto-loading.patch
+net-ipv4-fix-multipath-rtm_getroute-behavior-when-iif-is-given.patch
+sctp-listen-on-the-sock-only-when-it-s-state-is-listening-or-closed.patch
+tcp-clear-saved_syn-in-tcp_disconnect.patch
+ipv6-fix-idev-addr_list-corruption.patch
+net-timestamp-avoid-use-after-free-in-ip_recv_error.patch
+net-vrf-fix-setting-nlm_f_excl-flag-when-adding-l3mdev-rule.patch
+sh_eth-unmap-dma-buffers-when-freeing-rings.patch
+dp83640-don-t-recieve-time-stamps-twice.patch
+gso-validate-assumption-of-frag_list-segementation.patch
+net-ipv6-rtf_pcpu-should-not-be-settable-from-userspace.patch
+netpoll-check-for-skb-queue_mapping.patch
+ip6mr-fix-notification-device-destruction.patch
+net-mlx5-fix-driver-load-bad-flow-when-having-fw-initializing-timeout.patch
+net-mlx5e-fix-small-packet-threshold.patch
+net-mlx5e-fix-ethtool_grxclsrlall-handling.patch
+macvlan-fix-device-ref-leak-when-purging-bc_queue.patch
+net-ipv6-regenerate-host-route-if-moved-to-gc-list.patch
+net-phy-fix-auto-negotiation-stall-due-to-unavailable-interrupt.patch
+ipv6-check-skb-protocol-before-lookup-for-nexthop.patch
+tcp-memset-ca_priv-data-to-0-properly.patch
+ipv6-check-raw-payload-size-correctly-in-ioctl.patch
diff --git a/queue-4.9/sh_eth-unmap-dma-buffers-when-freeing-rings.patch b/queue-4.9/sh_eth-unmap-dma-buffers-when-freeing-rings.patch
new file mode 100644 (file)
index 0000000..928f4d1
--- /dev/null
@@ -0,0 +1,211 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+Date: Mon, 17 Apr 2017 15:55:22 +0300
+Subject: sh_eth: unmap DMA buffers when freeing rings
+
+From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+
+
+[ Upstream commit 1debdc8f9ebd07daf140e417b3841596911e0066 ]
+
+The DMA API debugging (when enabled) causes:
+
+WARNING: CPU: 0 PID: 1445 at lib/dma-debug.c:519 add_dma_entry+0xe0/0x12c
+DMA-API: exceeded 7 overlapping mappings of cacheline 0x01b2974d
+
+to be  printed after repeated initialization of the Ether device, e.g.
+suspend/resume or 'ifconfig' up/down. This is because DMA buffers mapped
+using dma_map_single() in sh_eth_ring_format() and sh_eth_start_xmit() are
+never unmapped. Resolve this problem by unmapping the buffers when freeing
+the descriptor  rings;  in order  to do it right, we'd have to add an extra
+parameter to sh_eth_txfree() (we rename this function to sh_eth_tx_free(),
+while at it).
+
+Based on the commit a47b70ea86bd ("ravb: unmap descriptors when freeing
+rings").
+
+Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/renesas/sh_eth.c |  122 ++++++++++++++++++----------------
+ 1 file changed, 67 insertions(+), 55 deletions(-)
+
+--- a/drivers/net/ethernet/renesas/sh_eth.c
++++ b/drivers/net/ethernet/renesas/sh_eth.c
+@@ -1059,12 +1059,70 @@ static struct mdiobb_ops bb_ops = {
+       .get_mdio_data = sh_get_mdio,
+ };
++/* free Tx skb function */
++static int sh_eth_tx_free(struct net_device *ndev, bool sent_only)
++{
++      struct sh_eth_private *mdp = netdev_priv(ndev);
++      struct sh_eth_txdesc *txdesc;
++      int free_num = 0;
++      int entry;
++      bool sent;
++
++      for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) {
++              entry = mdp->dirty_tx % mdp->num_tx_ring;
++              txdesc = &mdp->tx_ring[entry];
++              sent = !(txdesc->status & cpu_to_le32(TD_TACT));
++              if (sent_only && !sent)
++                      break;
++              /* TACT bit must be checked before all the following reads */
++              dma_rmb();
++              netif_info(mdp, tx_done, ndev,
++                         "tx entry %d status 0x%08x\n",
++                         entry, le32_to_cpu(txdesc->status));
++              /* Free the original skb. */
++              if (mdp->tx_skbuff[entry]) {
++                      dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr),
++                                       le32_to_cpu(txdesc->len) >> 16,
++                                       DMA_TO_DEVICE);
++                      dev_kfree_skb_irq(mdp->tx_skbuff[entry]);
++                      mdp->tx_skbuff[entry] = NULL;
++                      free_num++;
++              }
++              txdesc->status = cpu_to_le32(TD_TFP);
++              if (entry >= mdp->num_tx_ring - 1)
++                      txdesc->status |= cpu_to_le32(TD_TDLE);
++
++              if (sent) {
++                      ndev->stats.tx_packets++;
++                      ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16;
++              }
++      }
++      return free_num;
++}
++
+ /* free skb and descriptor buffer */
+ static void sh_eth_ring_free(struct net_device *ndev)
+ {
+       struct sh_eth_private *mdp = netdev_priv(ndev);
+       int ringsize, i;
++      if (mdp->rx_ring) {
++              for (i = 0; i < mdp->num_rx_ring; i++) {
++                      if (mdp->rx_skbuff[i]) {
++                              struct sh_eth_rxdesc *rxdesc = &mdp->rx_ring[i];
++
++                              dma_unmap_single(&ndev->dev,
++                                               le32_to_cpu(rxdesc->addr),
++                                               ALIGN(mdp->rx_buf_sz, 32),
++                                               DMA_FROM_DEVICE);
++                      }
++              }
++              ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring;
++              dma_free_coherent(NULL, ringsize, mdp->rx_ring,
++                                mdp->rx_desc_dma);
++              mdp->rx_ring = NULL;
++      }
++
+       /* Free Rx skb ringbuffer */
+       if (mdp->rx_skbuff) {
+               for (i = 0; i < mdp->num_rx_ring; i++)
+@@ -1073,27 +1131,18 @@ static void sh_eth_ring_free(struct net_
+       kfree(mdp->rx_skbuff);
+       mdp->rx_skbuff = NULL;
+-      /* Free Tx skb ringbuffer */
+-      if (mdp->tx_skbuff) {
+-              for (i = 0; i < mdp->num_tx_ring; i++)
+-                      dev_kfree_skb(mdp->tx_skbuff[i]);
+-      }
+-      kfree(mdp->tx_skbuff);
+-      mdp->tx_skbuff = NULL;
+-
+-      if (mdp->rx_ring) {
+-              ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring;
+-              dma_free_coherent(NULL, ringsize, mdp->rx_ring,
+-                                mdp->rx_desc_dma);
+-              mdp->rx_ring = NULL;
+-      }
+-
+       if (mdp->tx_ring) {
++              sh_eth_tx_free(ndev, false);
++
+               ringsize = sizeof(struct sh_eth_txdesc) * mdp->num_tx_ring;
+               dma_free_coherent(NULL, ringsize, mdp->tx_ring,
+                                 mdp->tx_desc_dma);
+               mdp->tx_ring = NULL;
+       }
++
++      /* Free Tx skb ringbuffer */
++      kfree(mdp->tx_skbuff);
++      mdp->tx_skbuff = NULL;
+ }
+ /* format skb and descriptor buffer */
+@@ -1341,43 +1390,6 @@ static void sh_eth_dev_exit(struct net_d
+       update_mac_address(ndev);
+ }
+-/* free Tx skb function */
+-static int sh_eth_txfree(struct net_device *ndev)
+-{
+-      struct sh_eth_private *mdp = netdev_priv(ndev);
+-      struct sh_eth_txdesc *txdesc;
+-      int free_num = 0;
+-      int entry;
+-
+-      for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) {
+-              entry = mdp->dirty_tx % mdp->num_tx_ring;
+-              txdesc = &mdp->tx_ring[entry];
+-              if (txdesc->status & cpu_to_le32(TD_TACT))
+-                      break;
+-              /* TACT bit must be checked before all the following reads */
+-              dma_rmb();
+-              netif_info(mdp, tx_done, ndev,
+-                         "tx entry %d status 0x%08x\n",
+-                         entry, le32_to_cpu(txdesc->status));
+-              /* Free the original skb. */
+-              if (mdp->tx_skbuff[entry]) {
+-                      dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr),
+-                                       le32_to_cpu(txdesc->len) >> 16,
+-                                       DMA_TO_DEVICE);
+-                      dev_kfree_skb_irq(mdp->tx_skbuff[entry]);
+-                      mdp->tx_skbuff[entry] = NULL;
+-                      free_num++;
+-              }
+-              txdesc->status = cpu_to_le32(TD_TFP);
+-              if (entry >= mdp->num_tx_ring - 1)
+-                      txdesc->status |= cpu_to_le32(TD_TDLE);
+-
+-              ndev->stats.tx_packets++;
+-              ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16;
+-      }
+-      return free_num;
+-}
+-
+ /* Packet receive function */
+ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
+ {
+@@ -1620,7 +1632,7 @@ ignore_link:
+                          intr_status, mdp->cur_tx, mdp->dirty_tx,
+                          (u32)ndev->state, edtrr);
+               /* dirty buffer free */
+-              sh_eth_txfree(ndev);
++              sh_eth_tx_free(ndev, true);
+               /* SH7712 BUG */
+               if (edtrr ^ sh_eth_get_edtrr_trns(mdp)) {
+@@ -1679,7 +1691,7 @@ static irqreturn_t sh_eth_interrupt(int
+               /* Clear Tx interrupts */
+               sh_eth_write(ndev, intr_status & cd->tx_check, EESR);
+-              sh_eth_txfree(ndev);
++              sh_eth_tx_free(ndev, true);
+               netif_wake_queue(ndev);
+       }
+@@ -2307,7 +2319,7 @@ static int sh_eth_start_xmit(struct sk_b
+       spin_lock_irqsave(&mdp->lock, flags);
+       if ((mdp->cur_tx - mdp->dirty_tx) >= (mdp->num_tx_ring - 4)) {
+-              if (!sh_eth_txfree(ndev)) {
++              if (!sh_eth_tx_free(ndev, true)) {
+                       netif_warn(mdp, tx_queued, ndev, "TxFD exhausted.\n");
+                       netif_stop_queue(ndev);
+                       spin_unlock_irqrestore(&mdp->lock, flags);
diff --git a/queue-4.9/sparc64-fix-kernel-panic-due-to-erroneous-ifdef-surrounding-pmd_write.patch b/queue-4.9/sparc64-fix-kernel-panic-due-to-erroneous-ifdef-surrounding-pmd_write.patch
new file mode 100644 (file)
index 0000000..036529a
--- /dev/null
@@ -0,0 +1,101 @@
+From foo@baz Sat Apr 29 08:20:55 CEST 2017
+From: Tom Hromatka <tom.hromatka@oracle.com>
+Date: Fri, 31 Mar 2017 16:31:42 -0600
+Subject: sparc64: Fix kernel panic due to erroneous #ifdef surrounding pmd_write()
+
+From: Tom Hromatka <tom.hromatka@oracle.com>
+
+
+[ Upstream commit 9ae34dbd8afd790cb5f52467e4f816434379eafa ]
+
+This commit moves sparc64's prototype of pmd_write() outside
+of the CONFIG_TRANSPARENT_HUGEPAGE ifdef.
+
+In 2013, commit a7b9403f0e6d ("sparc64: Encode huge PMDs using PTE
+encoding.") exposed a path where pmd_write() could be called without
+CONFIG_TRANSPARENT_HUGEPAGE defined.  This can result in the panic below.
+
+The diff is awkward to read, but the changes are straightforward.
+pmd_write() was moved outside of #ifdef CONFIG_TRANSPARENT_HUGEPAGE.
+Also, __HAVE_ARCH_PMD_WRITE was defined.
+
+kernel BUG at include/asm-generic/pgtable.h:576!
+              \|/ ____ \|/
+              "@'/ .. \`@"
+              /_| \__/ |_\
+                 \__U_/
+oracle_8114_cdb(8114): Kernel bad sw trap 5 [#1]
+CPU: 120 PID: 8114 Comm: oracle_8114_cdb Not tainted
+4.1.12-61.7.1.el6uek.rc1.sparc64 #1
+task: fff8400700a24d60 ti: fff8400700bc4000 task.ti: fff8400700bc4000
+TSTATE: 0000004411e01607 TPC: 00000000004609f8 TNPC: 00000000004609fc Y:
+00000005    Not tainted
+TPC: <gup_huge_pmd+0x198/0x1e0>
+g0: 000000000001c000 g1: 0000000000ef3954 g2: 0000000000000000 g3: 0000000000000001
+g4: fff8400700a24d60 g5: fff8001fa5c10000 g6: fff8400700bc4000 g7: 0000000000000720
+o0: 0000000000bc5058 o1: 0000000000000240 o2: 0000000000006000 o3: 0000000000001c00
+o4: 0000000000000000 o5: 0000048000080000 sp: fff8400700bc6ab1 ret_pc: 00000000004609f0
+RPC: <gup_huge_pmd+0x190/0x1e0>
+l0: fff8400700bc74fc l1: 0000000000020000 l2: 0000000000002000 l3: 0000000000000000
+l4: fff8001f93250950 l5: 000000000113f800 l6: 0000000000000004 l7: 0000000000000000
+i0: fff8400700ca46a0 i1: bd0000085e800453 i2: 000000026a0c4000 i3: 000000026a0c6000
+i4: 0000000000000001 i5: fff800070c958de8 i6: fff8400700bc6b61 i7: 0000000000460dd0
+I7: <gup_pud_range+0x170/0x1a0>
+Call Trace:
+ [0000000000460dd0] gup_pud_range+0x170/0x1a0
+ [0000000000460e84] get_user_pages_fast+0x84/0x120
+ [00000000006f5a18] iov_iter_get_pages+0x98/0x240
+ [00000000005fa744] do_direct_IO+0xf64/0x1e00
+ [00000000005fbbc0] __blockdev_direct_IO+0x360/0x15a0
+ [00000000101f74fc] ext4_ind_direct_IO+0xdc/0x400 [ext4]
+ [00000000101af690] ext4_ext_direct_IO+0x1d0/0x2c0 [ext4]
+ [00000000101af86c] ext4_direct_IO+0xec/0x220 [ext4]
+ [0000000000553bd4] generic_file_read_iter+0x114/0x140
+ [00000000005bdc2c] __vfs_read+0xac/0x100
+ [00000000005bf254] vfs_read+0x54/0x100
+ [00000000005bf368] SyS_pread64+0x68/0x80
+
+Signed-off-by: Tom Hromatka <tom.hromatka@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/pgtable_64.h |   15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/arch/sparc/include/asm/pgtable_64.h
++++ b/arch/sparc/include/asm/pgtable_64.h
+@@ -673,26 +673,27 @@ static inline unsigned long pmd_pfn(pmd_
+       return pte_pfn(pte);
+ }
+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+-static inline unsigned long pmd_dirty(pmd_t pmd)
++#define __HAVE_ARCH_PMD_WRITE
++static inline unsigned long pmd_write(pmd_t pmd)
+ {
+       pte_t pte = __pte(pmd_val(pmd));
+-      return pte_dirty(pte);
++      return pte_write(pte);
+ }
+-static inline unsigned long pmd_young(pmd_t pmd)
++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
++static inline unsigned long pmd_dirty(pmd_t pmd)
+ {
+       pte_t pte = __pte(pmd_val(pmd));
+-      return pte_young(pte);
++      return pte_dirty(pte);
+ }
+-static inline unsigned long pmd_write(pmd_t pmd)
++static inline unsigned long pmd_young(pmd_t pmd)
+ {
+       pte_t pte = __pte(pmd_val(pmd));
+-      return pte_write(pte);
++      return pte_young(pte);
+ }
+ static inline unsigned long pmd_trans_huge(pmd_t pmd)
diff --git a/queue-4.9/sparc64-kern_addr_valid-regression.patch b/queue-4.9/sparc64-kern_addr_valid-regression.patch
new file mode 100644 (file)
index 0000000..4b71288
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Sat Apr 29 08:20:55 CEST 2017
+From: bob picco <bob.picco@oracle.com>
+Date: Fri, 10 Mar 2017 14:31:19 -0500
+Subject: sparc64: kern_addr_valid regression
+
+From: bob picco <bob.picco@oracle.com>
+
+
+[ Upstream commit adfae8a5d833fa2b46577a8081f350e408851f5b ]
+
+I encountered this bug when using /proc/kcore to examine the kernel. Plus a
+coworker inquired about debugging tools. We computed pa but did
+not use it during the maximum physical address bits test. Instead we used
+the identity mapped virtual address which will always fail this test.
+
+I believe the defect came in here:
+[bpicco@zareason linus.git]$ git describe --contains bb4e6e85daa52
+v3.18-rc1~87^2~4
+.
+
+Signed-off-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/mm/init_64.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -1495,7 +1495,7 @@ bool kern_addr_valid(unsigned long addr)
+       if ((long)addr < 0L) {
+               unsigned long pa = __pa(addr);
+-              if ((addr >> max_phys_bits) != 0UL)
++              if ((pa >> max_phys_bits) != 0UL)
+                       return false;
+               return pfn_valid(pa >> PAGE_SHIFT);
diff --git a/queue-4.9/tcp-clear-saved_syn-in-tcp_disconnect.patch b/queue-4.9/tcp-clear-saved_syn-in-tcp_disconnect.patch
new file mode 100644 (file)
index 0000000..27a5e4e
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 8 Apr 2017 08:07:33 -0700
+Subject: tcp: clear saved_syn in tcp_disconnect()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 17c3060b1701fc69daedb4c90be6325d3d9fca8e ]
+
+In the (very unlikely) case a passive socket becomes a listener,
+we do not want to duplicate its saved SYN headers.
+
+This would lead to double frees, use after free, and please hackers and
+various fuzzers
+
+Tested:
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, IPPROTO_TCP, TCP_SAVE_SYN, [1], 4) = 0
+   +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 5) = 0
+
+   +0 < S 0:0(0) win 32972 <mss 1460,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <...>
+  +.1 < . 1:1(0) ack 1 win 257
+   +0 accept(3, ..., ...) = 4
+
+   +0 connect(4, AF_UNSPEC, ...) = 0
+   +0 close(3) = 0
+   +0 bind(4, ..., ...) = 0
+   +0 listen(4, 5) = 0
+
+   +0 < S 0:0(0) win 32972 <mss 1460,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <...>
+  +.1 < . 1:1(0) ack 1 win 257
+
+Fixes: cd8ae85299d5 ("tcp: provide SYN headers for passive connections")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2297,6 +2297,7 @@ int tcp_disconnect(struct sock *sk, int
+       tcp_init_send_head(sk);
+       memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
+       __sk_dst_reset(sk);
++      tcp_saved_syn_free(tp);
+       WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
diff --git a/queue-4.9/tcp-memset-ca_priv-data-to-0-properly.patch b/queue-4.9/tcp-memset-ca_priv-data-to-0-properly.patch
new file mode 100644 (file)
index 0000000..e42325b
--- /dev/null
@@ -0,0 +1,61 @@
+From foo@baz Sat Apr 29 08:22:13 CEST 2017
+From: Wei Wang <weiwan@google.com>
+Date: Tue, 25 Apr 2017 17:38:02 -0700
+Subject: tcp: memset ca_priv data to 0 properly
+
+From: Wei Wang <weiwan@google.com>
+
+
+[ Upstream commit c1201444075009507a6818de6518e2822b9a87c8 ]
+
+Always zero out ca_priv data in tcp_assign_congestion_control() so that
+ca_priv data is cleared out during socket creation.
+Also always zero out ca_priv data in tcp_reinit_congestion_control() so
+that when cc algorithm is changed, ca_priv data is cleared out as well.
+We should still zero out ca_priv data even in TCP_CLOSE state because
+user could call connect() on AF_UNSPEC to disconnect the socket and
+leave it in TCP_CLOSE state and later call setsockopt() to switch cc
+algorithm on this socket.
+
+Fixes: 2b0a8c9ee ("tcp: add CDG congestion control")
+Reported-by: Andrey Konovalov  <andreyknvl@google.com>
+Signed-off-by: Wei Wang <weiwan@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_cong.c |   11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+--- a/net/ipv4/tcp_cong.c
++++ b/net/ipv4/tcp_cong.c
+@@ -167,12 +167,8 @@ void tcp_assign_congestion_control(struc
+       }
+ out:
+       rcu_read_unlock();
++      memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+-      /* Clear out private data before diag gets it and
+-       * the ca has not been initialized.
+-       */
+-      if (ca->get_info)
+-              memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+       if (ca->flags & TCP_CONG_NEEDS_ECN)
+               INET_ECN_xmit(sk);
+       else
+@@ -199,11 +195,10 @@ static void tcp_reinit_congestion_contro
+       tcp_cleanup_congestion_control(sk);
+       icsk->icsk_ca_ops = ca;
+       icsk->icsk_ca_setsockopt = 1;
++      memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+-      if (sk->sk_state != TCP_CLOSE) {
+-              memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
++      if (sk->sk_state != TCP_CLOSE)
+               tcp_init_congestion_control(sk);
+-      }
+ }
+ /* Manage refcounts on socket close. */