]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.8-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 8 Dec 2016 06:20:44 +0000 (07:20 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 8 Dec 2016 06:20:44 +0000 (07:20 +0100)
added patches:
af_unix-conditionally-use-freezable-blocking-calls-in-read.patch
cdc_ether-fix-handling-connection-notification.patch
geneve-avoid-use-after-free-of-skb-data.patch
gro_cells-mark-napi-struct-as-not-busy-poll-candidates.patch
gso-reload-iph-after-pskb_may_pull.patch
ip6_offload-check-segs-for-null-in-ipv6_gso_segment.patch
ip6_tunnel-disable-caching-when-the-traffic-class-is-inherited.patch
ipv4-drop-leaf-from-suffix-pull-push-functions.patch
ipv4-drop-suffix-update-from-resize-code.patch
ipv4-fix-memory-leak-in-exception-case-for-splitting-tries.patch
ipv4-restore-fib_trie_flush_external-function-and-fix-call-ordering.patch
ipv6-bump-genid-when-the-ifa_f_tentative-flag-is-clear.patch
l2tp-fix-racy-sock_zapped-flag-check-in-l2tp_ip-6-_bind.patch
net-avoid-signed-overflows-for-so_-snd-rcv-bufforce.patch
net-bcmgenet-utilize-correct-struct-device-for-all-dma-operations.patch
net-check-dead-netns-for-peernet2id_alloc.patch
net-dccp-fix-use-after-free-in-dccp_invalid_packet.patch
net-dsa-b53-fix-vlan-usage-and-how-we-treat-cpu-port.patch
net-dsa-bcm_sf2-ensure-we-re-negotiate-eee-during-after-link-change.patch
net-dsa-fix-unbalanced-dsa_switch_tree-reference-counting.patch
net-macb-fix-the-rx-queue-reset-in-macb_rx.patch
net-ping-check-minimum-size-on-icmp-header-length.patch
net-sched-pedit-make-sure-that-offset-is-valid.patch
net-sched-respect-rcu-grace-period-on-cls-destruction.patch
net-sky2-fix-shutdown-crash.patch
netlink-call-cb-done-from-a-worker-thread.patch
netlink-do-not-schedule-work-from-sk_destruct.patch
packet-fix-race-condition-in-packet_set_ring.patch
rtnetlink-fix-fdb-size-computation.patch
rtnl-fix-the-loop-index-update-error-in-rtnl_dump_ifinfo.patch
sh_eth-remove-unchecked-interrupts-for-rz-a1.patch
sparc32-fix-inverted-invalid_frame_pointer-checks-on-sigreturns.patch
sparc64-fix-compile-warning-section-mismatch-in-find_node.patch
sparc64-fix-find_node-warning-if-numa-node-cannot-be-found.patch
tipc-check-minimum-bearer-mtu.patch
udplite-call-proper-backlog-handlers.patch
virtio-net-add-a-missing-synchronize_net.patch

37 files changed:
queue-4.8/af_unix-conditionally-use-freezable-blocking-calls-in-read.patch [new file with mode: 0644]
queue-4.8/cdc_ether-fix-handling-connection-notification.patch [new file with mode: 0644]
queue-4.8/geneve-avoid-use-after-free-of-skb-data.patch [new file with mode: 0644]
queue-4.8/gro_cells-mark-napi-struct-as-not-busy-poll-candidates.patch [new file with mode: 0644]
queue-4.8/gso-reload-iph-after-pskb_may_pull.patch [new file with mode: 0644]
queue-4.8/ip6_offload-check-segs-for-null-in-ipv6_gso_segment.patch [new file with mode: 0644]
queue-4.8/ip6_tunnel-disable-caching-when-the-traffic-class-is-inherited.patch [new file with mode: 0644]
queue-4.8/ipv4-drop-leaf-from-suffix-pull-push-functions.patch [new file with mode: 0644]
queue-4.8/ipv4-drop-suffix-update-from-resize-code.patch [new file with mode: 0644]
queue-4.8/ipv4-fix-memory-leak-in-exception-case-for-splitting-tries.patch [new file with mode: 0644]
queue-4.8/ipv4-restore-fib_trie_flush_external-function-and-fix-call-ordering.patch [new file with mode: 0644]
queue-4.8/ipv6-bump-genid-when-the-ifa_f_tentative-flag-is-clear.patch [new file with mode: 0644]
queue-4.8/l2tp-fix-racy-sock_zapped-flag-check-in-l2tp_ip-6-_bind.patch [new file with mode: 0644]
queue-4.8/net-avoid-signed-overflows-for-so_-snd-rcv-bufforce.patch [new file with mode: 0644]
queue-4.8/net-bcmgenet-utilize-correct-struct-device-for-all-dma-operations.patch [new file with mode: 0644]
queue-4.8/net-check-dead-netns-for-peernet2id_alloc.patch [new file with mode: 0644]
queue-4.8/net-dccp-fix-use-after-free-in-dccp_invalid_packet.patch [new file with mode: 0644]
queue-4.8/net-dsa-b53-fix-vlan-usage-and-how-we-treat-cpu-port.patch [new file with mode: 0644]
queue-4.8/net-dsa-bcm_sf2-ensure-we-re-negotiate-eee-during-after-link-change.patch [new file with mode: 0644]
queue-4.8/net-dsa-fix-unbalanced-dsa_switch_tree-reference-counting.patch [new file with mode: 0644]
queue-4.8/net-macb-fix-the-rx-queue-reset-in-macb_rx.patch [new file with mode: 0644]
queue-4.8/net-ping-check-minimum-size-on-icmp-header-length.patch [new file with mode: 0644]
queue-4.8/net-sched-pedit-make-sure-that-offset-is-valid.patch [new file with mode: 0644]
queue-4.8/net-sched-respect-rcu-grace-period-on-cls-destruction.patch [new file with mode: 0644]
queue-4.8/net-sky2-fix-shutdown-crash.patch [new file with mode: 0644]
queue-4.8/netlink-call-cb-done-from-a-worker-thread.patch [new file with mode: 0644]
queue-4.8/netlink-do-not-schedule-work-from-sk_destruct.patch [new file with mode: 0644]
queue-4.8/packet-fix-race-condition-in-packet_set_ring.patch [new file with mode: 0644]
queue-4.8/rtnetlink-fix-fdb-size-computation.patch [new file with mode: 0644]
queue-4.8/rtnl-fix-the-loop-index-update-error-in-rtnl_dump_ifinfo.patch [new file with mode: 0644]
queue-4.8/sh_eth-remove-unchecked-interrupts-for-rz-a1.patch [new file with mode: 0644]
queue-4.8/sparc32-fix-inverted-invalid_frame_pointer-checks-on-sigreturns.patch [new file with mode: 0644]
queue-4.8/sparc64-fix-compile-warning-section-mismatch-in-find_node.patch [new file with mode: 0644]
queue-4.8/sparc64-fix-find_node-warning-if-numa-node-cannot-be-found.patch [new file with mode: 0644]
queue-4.8/tipc-check-minimum-bearer-mtu.patch [new file with mode: 0644]
queue-4.8/udplite-call-proper-backlog-handlers.patch [new file with mode: 0644]
queue-4.8/virtio-net-add-a-missing-synchronize_net.patch [new file with mode: 0644]

diff --git a/queue-4.8/af_unix-conditionally-use-freezable-blocking-calls-in-read.patch b/queue-4.8/af_unix-conditionally-use-freezable-blocking-calls-in-read.patch
new file mode 100644 (file)
index 0000000..c343923
--- /dev/null
@@ -0,0 +1,102 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Thu, 17 Nov 2016 15:55:26 -0800
+Subject: af_unix: conditionally use freezable blocking calls in read
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit 06a77b07e3b44aea2b3c0e64de420ea2cfdcbaa9 ]
+
+Commit 2b15af6f95 ("af_unix: use freezable blocking calls in read")
+converts schedule_timeout() to its freezable version, it was probably
+correct at that time, but later, commit 2b514574f7e8
+("net: af_unix: implement splice for stream af_unix sockets") breaks
+the strong requirement for a freezable sleep, according to
+commit 0f9548ca1091:
+
+    We shouldn't try_to_freeze if locks are held.  Holding a lock can cause a
+    deadlock if the lock is later acquired in the suspend or hibernate path
+    (e.g.  by dpm).  Holding a lock can also cause a deadlock in the case of
+    cgroup_freezer if a lock is held inside a frozen cgroup that is later
+    acquired by a process outside that group.
+
+The pipe_lock is still held at that point.
+
+So use freezable version only for the recvmsg call path, avoid impact for
+Android.
+
+Fixes: 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets")
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Colin Cross <ccross@android.com>
+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/af_unix.c |   17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -2199,7 +2199,8 @@ out:
+  *    Sleep until more data has arrived. But check for races..
+  */
+ static long unix_stream_data_wait(struct sock *sk, long timeo,
+-                                struct sk_buff *last, unsigned int last_len)
++                                struct sk_buff *last, unsigned int last_len,
++                                bool freezable)
+ {
+       struct sk_buff *tail;
+       DEFINE_WAIT(wait);
+@@ -2220,7 +2221,10 @@ static long unix_stream_data_wait(struct
+               sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+               unix_state_unlock(sk);
+-              timeo = freezable_schedule_timeout(timeo);
++              if (freezable)
++                      timeo = freezable_schedule_timeout(timeo);
++              else
++                      timeo = schedule_timeout(timeo);
+               unix_state_lock(sk);
+               if (sock_flag(sk, SOCK_DEAD))
+@@ -2250,7 +2254,8 @@ struct unix_stream_read_state {
+       unsigned int splice_flags;
+ };
+-static int unix_stream_read_generic(struct unix_stream_read_state *state)
++static int unix_stream_read_generic(struct unix_stream_read_state *state,
++                                  bool freezable)
+ {
+       struct scm_cookie scm;
+       struct socket *sock = state->socket;
+@@ -2330,7 +2335,7 @@ again:
+                       mutex_unlock(&u->iolock);
+                       timeo = unix_stream_data_wait(sk, timeo, last,
+-                                                    last_len);
++                                                    last_len, freezable);
+                       if (signal_pending(current)) {
+                               err = sock_intr_errno(timeo);
+@@ -2472,7 +2477,7 @@ static int unix_stream_recvmsg(struct so
+               .flags = flags
+       };
+-      return unix_stream_read_generic(&state);
++      return unix_stream_read_generic(&state, true);
+ }
+ static ssize_t skb_unix_socket_splice(struct sock *sk,
+@@ -2518,7 +2523,7 @@ static ssize_t unix_stream_splice_read(s
+           flags & SPLICE_F_NONBLOCK)
+               state.flags = MSG_DONTWAIT;
+-      return unix_stream_read_generic(&state);
++      return unix_stream_read_generic(&state, false);
+ }
+ static int unix_shutdown(struct socket *sock, int mode)
diff --git a/queue-4.8/cdc_ether-fix-handling-connection-notification.patch b/queue-4.8/cdc_ether-fix-handling-connection-notification.patch
new file mode 100644 (file)
index 0000000..c146462
--- /dev/null
@@ -0,0 +1,109 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Kristian Evensen <kristian.evensen@gmail.com>
+Date: Thu, 1 Dec 2016 14:23:17 +0100
+Subject: cdc_ether: Fix handling connection notification
+
+From: Kristian Evensen <kristian.evensen@gmail.com>
+
+
+[ Upstream commit d5c83d0d1d83b3798c71e0c8b7c3624d39c91d88 ]
+
+Commit bfe9b9d2df66 ("cdc_ether: Improve ZTE MF823/831/910 handling")
+introduced a work-around in usbnet_cdc_status() for devices that exported
+cdc carrier on twice on connect. Before the commit, this behavior caused
+the link state to be incorrect. It was assumed that all CDC Ethernet
+devices would either export this behavior, or send one off and then one on
+notification (which seems to be the default behavior).
+
+Unfortunately, it turns out multiple devices sends a connection
+notification multiple times per second (via an interrupt), even when
+connection state does not change. This has been observed with several
+different USB LAN dongles (at least), for example 13b1:0041 (Linksys).
+After bfe9b9d2df66, the link state has been set as down and then up for
+each notification. This has caused a flood of Netlink NEWLINK messages and
+syslog to be flooded with messages similar to:
+
+cdc_ether 2-1:2.0 eth1: kevent 12 may have been dropped
+
+This commit fixes the behavior by reverting usbnet_cdc_status() to how it
+was before bfe9b9d2df66. The work-around has been moved to a separate
+status-function which is only called when a known, affect device is
+detected.
+
+v1->v2:
+
+* Do not open-code netif_carrier_ok() (thanks Henning Schild).
+* Call netif_carrier_off() instead of usb_link_change(). This prevents
+calling schedule_work() twice without giving the work queue a chance to be
+processed (thanks Bjørn Mork).
+
+Fixes: bfe9b9d2df66 ("cdc_ether: Improve ZTE MF823/831/910 handling")
+Reported-by: Henning Schild <henning.schild@siemens.com>
+Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/cdc_ether.c |   38 +++++++++++++++++++++++++++++++-------
+ 1 file changed, 31 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/usb/cdc_ether.c
++++ b/drivers/net/usb/cdc_ether.c
+@@ -388,12 +388,6 @@ void usbnet_cdc_status(struct usbnet *de
+       case USB_CDC_NOTIFY_NETWORK_CONNECTION:
+               netif_dbg(dev, timer, dev->net, "CDC: carrier %s\n",
+                         event->wValue ? "on" : "off");
+-
+-              /* Work-around for devices with broken off-notifications */
+-              if (event->wValue &&
+-                  !test_bit(__LINK_STATE_NOCARRIER, &dev->net->state))
+-                      usbnet_link_change(dev, 0, 0);
+-
+               usbnet_link_change(dev, !!event->wValue, 0);
+               break;
+       case USB_CDC_NOTIFY_SPEED_CHANGE:       /* tx/rx rates */
+@@ -466,6 +460,36 @@ static int usbnet_cdc_zte_rx_fixup(struc
+       return 1;
+ }
++/* Ensure correct link state
++ *
++ * Some devices (ZTE MF823/831/910) export two carrier on notifications when
++ * connected. This causes the link state to be incorrect. Work around this by
++ * always setting the state to off, then on.
++ */
++void usbnet_cdc_zte_status(struct usbnet *dev, struct urb *urb)
++{
++      struct usb_cdc_notification *event;
++
++      if (urb->actual_length < sizeof(*event))
++              return;
++
++      event = urb->transfer_buffer;
++
++      if (event->bNotificationType != USB_CDC_NOTIFY_NETWORK_CONNECTION) {
++              usbnet_cdc_status(dev, urb);
++              return;
++      }
++
++      netif_dbg(dev, timer, dev->net, "CDC: carrier %s\n",
++                event->wValue ? "on" : "off");
++
++      if (event->wValue &&
++          netif_carrier_ok(dev->net))
++              netif_carrier_off(dev->net);
++
++      usbnet_link_change(dev, !!event->wValue, 0);
++}
++
+ static const struct driver_info       cdc_info = {
+       .description =  "CDC Ethernet Device",
+       .flags =        FLAG_ETHER | FLAG_POINTTOPOINT,
+@@ -481,7 +505,7 @@ static const struct driver_info    zte_cdc_
+       .flags =        FLAG_ETHER | FLAG_POINTTOPOINT,
+       .bind =         usbnet_cdc_zte_bind,
+       .unbind =       usbnet_cdc_unbind,
+-      .status =       usbnet_cdc_status,
++      .status =       usbnet_cdc_zte_status,
+       .set_rx_mode =  usbnet_cdc_update_filter,
+       .manage_power = usbnet_manage_power,
+       .rx_fixup = usbnet_cdc_zte_rx_fixup,
diff --git a/queue-4.8/geneve-avoid-use-after-free-of-skb-data.patch b/queue-4.8/geneve-avoid-use-after-free-of-skb-data.patch
new file mode 100644 (file)
index 0000000..dc8d5dd
--- /dev/null
@@ -0,0 +1,96 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Fri, 2 Dec 2016 16:49:29 +0100
+Subject: geneve: avoid use-after-free of skb->data
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+
+[ Upstream commit 5b01014759991887b1e450c9def01e58c02ab81b ]
+
+geneve{,6}_build_skb can end up doing a pskb_expand_head(), which
+makes the ip_hdr(skb) reference we stashed earlier stale. Since it's
+only needed as an argument to ip_tunnel_ecn_encap(), move this
+directly in the function call.
+
+Fixes: 08399efc6319 ("geneve: ensure ECN info is handled properly in all tx/rx paths")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Reviewed-by: John W. Linville <linville@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/geneve.c |   14 ++++----------
+ 1 file changed, 4 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/geneve.c
++++ b/drivers/net/geneve.c
+@@ -844,7 +844,6 @@ static netdev_tx_t geneve_xmit_skb(struc
+       struct geneve_dev *geneve = netdev_priv(dev);
+       struct geneve_sock *gs4 = geneve->sock4;
+       struct rtable *rt = NULL;
+-      const struct iphdr *iip; /* interior IP header */
+       int err = -EINVAL;
+       struct flowi4 fl4;
+       __u8 tos, ttl;
+@@ -871,8 +870,6 @@ static netdev_tx_t geneve_xmit_skb(struc
+       sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
+       skb_reset_mac_header(skb);
+-      iip = ip_hdr(skb);
+-
+       if (info) {
+               const struct ip_tunnel_key *key = &info->key;
+               u8 *opts = NULL;
+@@ -892,7 +889,7 @@ static netdev_tx_t geneve_xmit_skb(struc
+               if (unlikely(err))
+                       goto tx_error;
+-              tos = ip_tunnel_ecn_encap(key->tos, iip, skb);
++              tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
+               ttl = key->ttl;
+               df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+       } else {
+@@ -901,7 +898,7 @@ static netdev_tx_t geneve_xmit_skb(struc
+               if (unlikely(err))
+                       goto tx_error;
+-              tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb);
++              tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
+               ttl = geneve->ttl;
+               if (!ttl && IN_MULTICAST(ntohl(fl4.daddr)))
+                       ttl = 1;
+@@ -934,7 +931,6 @@ static netdev_tx_t geneve6_xmit_skb(stru
+       struct geneve_dev *geneve = netdev_priv(dev);
+       struct geneve_sock *gs6 = geneve->sock6;
+       struct dst_entry *dst = NULL;
+-      const struct iphdr *iip; /* interior IP header */
+       int err = -EINVAL;
+       struct flowi6 fl6;
+       __u8 prio, ttl;
+@@ -959,8 +955,6 @@ static netdev_tx_t geneve6_xmit_skb(stru
+       sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
+       skb_reset_mac_header(skb);
+-      iip = ip_hdr(skb);
+-
+       if (info) {
+               const struct ip_tunnel_key *key = &info->key;
+               u8 *opts = NULL;
+@@ -981,7 +975,7 @@ static netdev_tx_t geneve6_xmit_skb(stru
+               if (unlikely(err))
+                       goto tx_error;
+-              prio = ip_tunnel_ecn_encap(key->tos, iip, skb);
++              prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
+               ttl = key->ttl;
+               label = info->key.label;
+       } else {
+@@ -991,7 +985,7 @@ static netdev_tx_t geneve6_xmit_skb(stru
+                       goto tx_error;
+               prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
+-                                         iip, skb);
++                                         ip_hdr(skb), skb);
+               ttl = geneve->ttl;
+               if (!ttl && ipv6_addr_is_multicast(&fl6.daddr))
+                       ttl = 1;
diff --git a/queue-4.8/gro_cells-mark-napi-struct-as-not-busy-poll-candidates.patch b/queue-4.8/gro_cells-mark-napi-struct-as-not-busy-poll-candidates.patch
new file mode 100644 (file)
index 0000000..e438991
--- /dev/null
@@ -0,0 +1,52 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 14 Nov 2016 16:28:42 -0800
+Subject: gro_cells: mark napi struct as not busy poll candidates
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit e88a2766143a27bfe6704b4493b214de4094cf29 ]
+
+Rolf Neugebauer reported very long delays at netns dismantle.
+
+Eric W. Biederman was kind enough to look at this problem
+and noticed synchronize_net() occurring from netif_napi_del() that was
+added in linux-4.5
+
+Busy polling makes no sense for tunnels NAPI.
+If busy poll is used for sessions over tunnels, the poller will need to
+poll the physical device queue anyway.
+
+netif_tx_napi_add() could be used here, but function name is misleading,
+and renaming it is not stable material, so set NAPI_STATE_NO_BUSY_POLL
+bit directly.
+
+This will avoid inserting gro_cells napi structures in napi_hash[]
+and avoid the problematic synchronize_net() (per possible cpu) that
+Rolf reported.
+
+Fixes: 93d05d4a320c ("net: provide generic busy polling to all NAPI drivers")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
+Reported-by: Eric W. Biederman <ebiederm@xmission.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Tested-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/gro_cells.h |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/include/net/gro_cells.h
++++ b/include/net/gro_cells.h
+@@ -68,6 +68,9 @@ static inline int gro_cells_init(struct
+               struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
+               __skb_queue_head_init(&cell->napi_skbs);
++
++              set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state);
++
+               netif_napi_add(dev, &cell->napi, gro_cell_poll, 64);
+               napi_enable(&cell->napi);
+       }
diff --git a/queue-4.8/gso-reload-iph-after-pskb_may_pull.patch b/queue-4.8/gso-reload-iph-after-pskb_may_pull.patch
new file mode 100644 (file)
index 0000000..75b13c4
--- /dev/null
@@ -0,0 +1,35 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Arnaldo Carvalho de Melo <acme@kernel.org>
+Date: Mon, 28 Nov 2016 12:36:58 -0300
+Subject: GSO: Reload iph after pskb_may_pull
+
+From: Arnaldo Carvalho de Melo <acme@kernel.org>
+
+
+[ Upstream commit a510887824171ad260cc4a2603396c6247fdd091 ]
+
+As it may get stale and lead to use after free.
+
+Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: Alexander Duyck <aduyck@mirantis.com>
+Cc: Andrey Konovalov <andreyknvl@google.com>
+Fixes: cbc53e08a793 ("GSO: Add GSO type for fixed IPv4 ID")
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/af_inet.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -1237,7 +1237,7 @@ struct sk_buff *inet_gso_segment(struct
+               fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
+               /* fixed ID is invalid if DF bit is not set */
+-              if (fixedid && !(iph->frag_off & htons(IP_DF)))
++              if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF)))
+                       goto out;
+       }
diff --git a/queue-4.8/ip6_offload-check-segs-for-null-in-ipv6_gso_segment.patch b/queue-4.8/ip6_offload-check-segs-for-null-in-ipv6_gso_segment.patch
new file mode 100644 (file)
index 0000000..722f71c
--- /dev/null
@@ -0,0 +1,111 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Artem Savkov <asavkov@redhat.com>
+Date: Thu, 1 Dec 2016 14:06:04 +0100
+Subject: ip6_offload: check segs for NULL in ipv6_gso_segment.
+
+From: Artem Savkov <asavkov@redhat.com>
+
+
+[ Upstream commit 6b6ebb6b01c873d0cfe3449e8a1219ee6e5fc022 ]
+
+segs needs to be checked for being NULL in ipv6_gso_segment() before calling
+skb_shinfo(segs), otherwise kernel can run into a NULL-pointer dereference:
+
+[   97.811262] BUG: unable to handle kernel NULL pointer dereference at 00000000000000cc
+[   97.819112] IP: [<ffffffff816e52f9>] ipv6_gso_segment+0x119/0x2f0
+[   97.825214] PGD 0 [   97.827047]
+[   97.828540] Oops: 0000 [#1] SMP
+[   97.831678] Modules linked in: vhost_net vhost macvtap macvlan nfsv3 rpcsec_gss_krb5
+nfsv4 dns_resolver nfs fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4
+iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack
+ipt_REJECT nf_reject_ipv4 tun ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter
+bridge stp llc snd_hda_codec_realtek snd_hda_codec_hdmi snd_hda_codec_generic snd_hda_intel
+snd_hda_codec edac_mce_amd snd_hda_core edac_core snd_hwdep kvm_amd snd_seq kvm snd_seq_device
+snd_pcm irqbypass snd_timer ppdev parport_serial snd parport_pc k10temp pcspkr soundcore parport
+sp5100_tco shpchp sg wmi i2c_piix4 acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc
+ip_tables xfs libcrc32c sr_mod cdrom sd_mod ata_generic pata_acpi amdkfd amd_iommu_v2 radeon
+broadcom bcm_phy_lib i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops
+ttm ahci serio_raw tg3 firewire_ohci libahci pata_atiixp drm ptp libata firewire_core pps_core
+i2c_core crc_itu_t fjes dm_mirror dm_region_hash dm_log dm_mod
+[   97.927721] CPU: 1 PID: 3504 Comm: vhost-3495 Not tainted 4.9.0-7.el7.test.x86_64 #1
+[   97.935457] Hardware name: AMD Snook/Snook, BIOS ESK0726A 07/26/2010
+[   97.941806] task: ffff880129a1c080 task.stack: ffffc90001bcc000
+[   97.947720] RIP: 0010:[<ffffffff816e52f9>]  [<ffffffff816e52f9>] ipv6_gso_segment+0x119/0x2f0
+[   97.956251] RSP: 0018:ffff88012fc43a10  EFLAGS: 00010207
+[   97.961557] RAX: 0000000000000000 RBX: ffff8801292c8700 RCX: 0000000000000594
+[   97.968687] RDX: 0000000000000593 RSI: ffff880129a846c0 RDI: 0000000000240000
+[   97.975814] RBP: ffff88012fc43a68 R08: ffff880129a8404e R09: 0000000000000000
+[   97.982942] R10: 0000000000000000 R11: ffff880129a84076 R12: 00000020002949b3
+[   97.990070] R13: ffff88012a580000 R14: 0000000000000000 R15: ffff88012a580000
+[   97.997198] FS:  0000000000000000(0000) GS:ffff88012fc40000(0000) knlGS:0000000000000000
+[   98.005280] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   98.011021] CR2: 00000000000000cc CR3: 0000000126c5d000 CR4: 00000000000006e0
+[   98.018149] Stack:
+[   98.020157]  00000000ffffffff ffff88012fc43ac8 ffffffffa017ad0a 000000000000000e
+[   98.027584]  0000001300000000 0000000077d59998 ffff8801292c8700 00000020002949b3
+[   98.035010]  ffff88012a580000 0000000000000000 ffff88012a580000 ffff88012fc43a98
+[   98.042437] Call Trace:
+[   98.044879]  <IRQ> [   98.046803]  [<ffffffffa017ad0a>] ? tg3_start_xmit+0x84a/0xd60 [tg3]
+[   98.053156]  [<ffffffff815eeee0>] skb_mac_gso_segment+0xb0/0x130
+[   98.059158]  [<ffffffff815eefd3>] __skb_gso_segment+0x73/0x110
+[   98.064985]  [<ffffffff815ef40d>] validate_xmit_skb+0x12d/0x2b0
+[   98.070899]  [<ffffffff815ef5d2>] validate_xmit_skb_list+0x42/0x70
+[   98.077073]  [<ffffffff81618560>] sch_direct_xmit+0xd0/0x1b0
+[   98.082726]  [<ffffffff815efd86>] __dev_queue_xmit+0x486/0x690
+[   98.088554]  [<ffffffff8135c135>] ? cpumask_next_and+0x35/0x50
+[   98.094380]  [<ffffffff815effa0>] dev_queue_xmit+0x10/0x20
+[   98.099863]  [<ffffffffa09ce057>] br_dev_queue_push_xmit+0xa7/0x170 [bridge]
+[   98.106907]  [<ffffffffa09ce161>] br_forward_finish+0x41/0xc0 [bridge]
+[   98.113430]  [<ffffffff81627cf2>] ? nf_iterate+0x52/0x60
+[   98.118735]  [<ffffffff81627d6b>] ? nf_hook_slow+0x6b/0xc0
+[   98.124216]  [<ffffffffa09ce32c>] __br_forward+0x14c/0x1e0 [bridge]
+[   98.130480]  [<ffffffffa09ce120>] ? br_dev_queue_push_xmit+0x170/0x170 [bridge]
+[   98.137785]  [<ffffffffa09ce4bd>] br_forward+0x9d/0xb0 [bridge]
+[   98.143701]  [<ffffffffa09cfbb7>] br_handle_frame_finish+0x267/0x560 [bridge]
+[   98.150834]  [<ffffffffa09d0064>] br_handle_frame+0x174/0x2f0 [bridge]
+[   98.157355]  [<ffffffff8102fb89>] ? sched_clock+0x9/0x10
+[   98.162662]  [<ffffffff810b63b2>] ? sched_clock_cpu+0x72/0xa0
+[   98.168403]  [<ffffffff815eccf5>] __netif_receive_skb_core+0x1e5/0xa20
+[   98.174926]  [<ffffffff813659f9>] ? timerqueue_add+0x59/0xb0
+[   98.180580]  [<ffffffff815ed548>] __netif_receive_skb+0x18/0x60
+[   98.186494]  [<ffffffff815ee625>] process_backlog+0x95/0x140
+[   98.192145]  [<ffffffff815edccd>] net_rx_action+0x16d/0x380
+[   98.197713]  [<ffffffff8170cff1>] __do_softirq+0xd1/0x283
+[   98.203106]  [<ffffffff8170b2bc>] do_softirq_own_stack+0x1c/0x30
+[   98.209107]  <EOI> [   98.211029]  [<ffffffff8108a5c0>] do_softirq+0x50/0x60
+[   98.216166]  [<ffffffff815ec853>] netif_rx_ni+0x33/0x80
+[   98.221386]  [<ffffffffa09eeff7>] tun_get_user+0x487/0x7f0 [tun]
+[   98.227388]  [<ffffffffa09ef3ab>] tun_sendmsg+0x4b/0x60 [tun]
+[   98.233129]  [<ffffffffa0b68932>] handle_tx+0x282/0x540 [vhost_net]
+[   98.239392]  [<ffffffffa0b68c25>] handle_tx_kick+0x15/0x20 [vhost_net]
+[   98.245916]  [<ffffffffa0abacfe>] vhost_worker+0x9e/0xf0 [vhost]
+[   98.251919]  [<ffffffffa0abac60>] ? vhost_umem_alloc+0x40/0x40 [vhost]
+[   98.258440]  [<ffffffff81003a47>] ? do_syscall_64+0x67/0x180
+[   98.264094]  [<ffffffff810a44d9>] kthread+0xd9/0xf0
+[   98.268965]  [<ffffffff810a4400>] ? kthread_park+0x60/0x60
+[   98.274444]  [<ffffffff8170a4d5>] ret_from_fork+0x25/0x30
+[   98.279836] Code: 8b 93 d8 00 00 00 48 2b 93 d0 00 00 00 4c 89 e6 48 89 df 66 89 93 c2 00 00 00 ff 10 48 3d 00 f0 ff ff 49 89 c2 0f 87 52 01 00 00 <41> 8b 92 cc 00 00 00 48 8b 80 d0 00 00 00 44 0f b7 74 10 06 66
+[   98.299425] RIP  [<ffffffff816e52f9>] ipv6_gso_segment+0x119/0x2f0
+[   98.305612]  RSP <ffff88012fc43a10>
+[   98.309094] CR2: 00000000000000cc
+[   98.312406] ---[ end trace 726a2c7a2d2d78d0 ]---
+
+Signed-off-by: Artem Savkov <asavkov@redhat.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_offload.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6_offload.c
++++ b/net/ipv6/ip6_offload.c
+@@ -98,7 +98,7 @@ static struct sk_buff *ipv6_gso_segment(
+               segs = ops->callbacks.gso_segment(skb, features);
+       }
+-      if (IS_ERR(segs))
++      if (IS_ERR_OR_NULL(segs))
+               goto out;
+       for (skb = segs; skb; skb = skb->next) {
diff --git a/queue-4.8/ip6_tunnel-disable-caching-when-the-traffic-class-is-inherited.patch b/queue-4.8/ip6_tunnel-disable-caching-when-the-traffic-class-is-inherited.patch
new file mode 100644 (file)
index 0000000..e5b2ff2
--- /dev/null
@@ -0,0 +1,62 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Wed, 16 Nov 2016 16:26:46 +0100
+Subject: ip6_tunnel: disable caching when the traffic class is inherited
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+
+[ Upstream commit b5c2d49544e5930c96e2632a7eece3f4325a1888 ]
+
+If an ip6 tunnel is configured to inherit the traffic class from
+the inner header, the dst_cache must be disabled or it will foul
+the policy routing.
+
+The issue is apprently there since at leat Linux-2.6.12-rc2.
+
+Reported-by: Liam McBirnie <liam.mcbirnie@boeing.com>
+Cc: Liam McBirnie <liam.mcbirnie@boeing.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_tunnel.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -1014,6 +1014,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, st
+       int mtu;
+       unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
+       unsigned int max_headroom = psh_hlen;
++      bool use_cache = false;
+       int err = -1;
+       /* NBMA tunnel */
+@@ -1038,7 +1039,15 @@ int ip6_tnl_xmit(struct sk_buff *skb, st
+               memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
+               neigh_release(neigh);
+-      } else if (!fl6->flowi6_mark)
++      } else if (!(t->parms.flags &
++                   (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
++              /* enable the cache only only if the routing decision does
++               * not depend on the current inner header value
++               */
++              use_cache = true;
++      }
++
++      if (use_cache)
+               dst = dst_cache_get(&t->dst_cache);
+       if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
+@@ -1113,7 +1122,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, st
+               skb = new_skb;
+       }
+-      if (!fl6->flowi6_mark && ndst)
++      if (use_cache && ndst)
+               dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
+       skb_dst_set(skb, dst);
diff --git a/queue-4.8/ipv4-drop-leaf-from-suffix-pull-push-functions.patch b/queue-4.8/ipv4-drop-leaf-from-suffix-pull-push-functions.patch
new file mode 100644 (file)
index 0000000..8e5c230
--- /dev/null
@@ -0,0 +1,87 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+Date: Thu, 1 Dec 2016 07:27:52 -0500
+Subject: ipv4: Drop leaf from suffix pull/push functions
+
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+
+
+[ Upstream commit 1a239173cccff726b60ac6a9c79ae4a1e26cfa49 ]
+
+It wasn't necessary to pass a leaf in when doing the suffix updates so just
+drop it.  Instead just pass the suffix and work with that.
+
+Since we dropped the leaf there is no need to include that in the name so
+the names are updated to node_push_suffix and node_pull_suffix.
+
+Finally I noticed that the logic for pulling the suffix length back
+actually had some issues.  Specifically it would stop prematurely if there
+was a longer suffix, but it was not as long as the original suffix.  I
+updated the code to address that in node_pull_suffix.
+
+Fixes: 5405afd1a306 ("fib_trie: Add tracking value for suffix length")
+Suggested-by: Robert Shearman <rshearma@brocade.com>
+Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Reviewed-by: Robert Shearman <rshearma@brocade.com>
+Tested-by: Robert Shearman <rshearma@brocade.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_trie.c |   26 ++++++++++++++------------
+ 1 file changed, 14 insertions(+), 12 deletions(-)
+
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -892,22 +892,24 @@ static struct key_vector *resize(struct
+       return tp;
+ }
+-static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l)
++static void node_pull_suffix(struct key_vector *tn, unsigned char slen)
+ {
+-      while ((tp->slen > tp->pos) && (tp->slen > l->slen)) {
+-              if (update_suffix(tp) > l->slen)
++      unsigned char node_slen = tn->slen;
++
++      while ((node_slen > tn->pos) && (node_slen > slen)) {
++              slen = update_suffix(tn);
++              if (node_slen == slen)
+                       break;
+-              tp = node_parent(tp);
++
++              tn = node_parent(tn);
++              node_slen = tn->slen;
+       }
+ }
+-static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l)
++static void node_push_suffix(struct key_vector *tn, unsigned char slen)
+ {
+-      /* if this is a new leaf then tn will be NULL and we can sort
+-       * out parent suffix lengths as a part of trie_rebalance
+-       */
+-      while (tn->slen < l->slen) {
+-              tn->slen = l->slen;
++      while (tn->slen < slen) {
++              tn->slen = slen;
+               tn = node_parent(tn);
+       }
+ }
+@@ -1069,7 +1071,7 @@ static int fib_insert_alias(struct trie
+       /* if we added to the tail node then we need to update slen */
+       if (l->slen < new->fa_slen) {
+               l->slen = new->fa_slen;
+-              leaf_push_suffix(tp, l);
++              node_push_suffix(tp, new->fa_slen);
+       }
+       return 0;
+@@ -1482,7 +1484,7 @@ static void fib_remove_alias(struct trie
+       /* update the trie with the latest suffix length */
+       l->slen = fa->fa_slen;
+-      leaf_pull_suffix(tp, l);
++      node_pull_suffix(tp, fa->fa_slen);
+ }
+ /* Caller must hold RTNL. */
diff --git a/queue-4.8/ipv4-drop-suffix-update-from-resize-code.patch b/queue-4.8/ipv4-drop-suffix-update-from-resize-code.patch
new file mode 100644 (file)
index 0000000..fb055cf
--- /dev/null
@@ -0,0 +1,124 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+Date: Thu, 1 Dec 2016 07:27:57 -0500
+Subject: ipv4: Drop suffix update from resize code
+
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+
+
+[ Upstream commit a52ca62c4a6771028da9c1de934cdbcd93d54bb4 ]
+
+It has been reported that update_suffix can be expensive when it is called
+on a large node in which most of the suffix lengths are the same.  The time
+required to add 200K entries had increased from around 3 seconds to almost
+49 seconds.
+
+In order to address this we need to move the code for updating the suffix
+out of resize and instead just have it handled in the cases where we are
+pushing a node that increases the suffix length, or will decrease the
+suffix length.
+
+Fixes: 5405afd1a306 ("fib_trie: Add tracking value for suffix length")
+Reported-by: Robert Shearman <rshearma@brocade.com>
+Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Reviewed-by: Robert Shearman <rshearma@brocade.com>
+Tested-by: Robert Shearman <rshearma@brocade.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_trie.c |   42 +++++++++++++++++++++---------------------
+ 1 file changed, 21 insertions(+), 21 deletions(-)
+
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -681,6 +681,13 @@ static unsigned char update_suffix(struc
+ {
+       unsigned char slen = tn->pos;
+       unsigned long stride, i;
++      unsigned char slen_max;
++
++      /* only vector 0 can have a suffix length greater than or equal to
++       * tn->pos + tn->bits, the second highest node will have a suffix
++       * length at most of tn->pos + tn->bits - 1
++       */
++      slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen);
+       /* search though the list of children looking for nodes that might
+        * have a suffix greater than the one we currently have.  This is
+@@ -698,12 +705,8 @@ static unsigned char update_suffix(struc
+               slen = n->slen;
+               i &= ~(stride - 1);
+-              /* if slen covers all but the last bit we can stop here
+-               * there will be nothing longer than that since only node
+-               * 0 and 1 << (bits - 1) could have that as their suffix
+-               * length.
+-               */
+-              if ((slen + 1) >= (tn->pos + tn->bits))
++              /* stop searching if we have hit the maximum possible value */
++              if (slen >= slen_max)
+                       break;
+       }
+@@ -875,21 +878,7 @@ static struct key_vector *resize(struct
+               return collapse(t, tn);
+       /* update parent in case halve failed */
+-      tp = node_parent(tn);
+-
+-      /* Return if at least one deflate was run */
+-      if (max_work != MAX_WORK)
+-              return tp;
+-
+-      /* push the suffix length to the parent node */
+-      if (tn->slen > tn->pos) {
+-              unsigned char slen = update_suffix(tn);
+-
+-              if (slen > tp->slen)
+-                      tp->slen = slen;
+-      }
+-
+-      return tp;
++      return node_parent(tn);
+ }
+ static void node_pull_suffix(struct key_vector *tn, unsigned char slen)
+@@ -1030,6 +1019,7 @@ static int fib_insert_node(struct trie *
+       }
+       /* Case 3: n is NULL, and will just insert a new leaf */
++      node_push_suffix(tp, new->fa_slen);
+       NODE_INIT_PARENT(l, tp);
+       put_child_root(tp, key, l);
+       trie_rebalance(t, tp);
+@@ -1472,6 +1462,8 @@ static void fib_remove_alias(struct trie
+        * out parent suffix lengths as a part of trie_rebalance
+        */
+       if (hlist_empty(&l->leaf)) {
++              if (tp->slen == l->slen)
++                      node_pull_suffix(tp, tp->pos);
+               put_child_root(tp, l->key, NULL);
+               node_free(l);
+               trie_rebalance(t, tp);
+@@ -1755,6 +1747,10 @@ void fib_table_flush_external(struct fib
+                       if (IS_TRIE(pn))
+                               break;
++                      /* update the suffix to address pulled leaves */
++                      if (pn->slen > pn->pos)
++                              update_suffix(pn);
++
+                       /* resize completed node */
+                       pn = resize(t, pn);
+                       cindex = get_index(pkey, pn);
+@@ -1830,6 +1826,10 @@ int fib_table_flush(struct fib_table *tb
+                       if (IS_TRIE(pn))
+                               break;
++                      /* update the suffix to address pulled leaves */
++                      if (pn->slen > pn->pos)
++                              update_suffix(pn);
++
+                       /* resize completed node */
+                       pn = resize(t, pn);
+                       cindex = get_index(pkey, pn);
diff --git a/queue-4.8/ipv4-fix-memory-leak-in-exception-case-for-splitting-tries.patch b/queue-4.8/ipv4-fix-memory-leak-in-exception-case-for-splitting-tries.patch
new file mode 100644 (file)
index 0000000..6530af9
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+Date: Tue, 15 Nov 2016 05:46:12 -0500
+Subject: ipv4: Fix memory leak in exception case for splitting tries
+
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+
+
+[ Upstream commit 3114cdfe66c156345b0ae34e2990472f277e0c1b ]
+
+Fix a small memory leak that can occur where we leak a fib_alias in the
+event of us not being able to insert it into the local table.
+
+Fixes: 0ddcf43d5d4a0 ("ipv4: FIB Local/MAIN table collapse")
+Reported-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_trie.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -1713,8 +1713,10 @@ struct fib_table *fib_trie_unmerge(struc
+                               local_l = fib_find_node(lt, &local_tp, l->key);
+                       if (fib_insert_alias(lt, local_tp, local_l, new_fa,
+-                                           NULL, l->key))
++                                           NULL, l->key)) {
++                              kmem_cache_free(fn_alias_kmem, new_fa);
+                               goto out;
++                      }
+               }
+               /* stop loop if key wrapped back to 0 */
diff --git a/queue-4.8/ipv4-restore-fib_trie_flush_external-function-and-fix-call-ordering.patch b/queue-4.8/ipv4-restore-fib_trie_flush_external-function-and-fix-call-ordering.patch
new file mode 100644 (file)
index 0000000..7f531e5
--- /dev/null
@@ -0,0 +1,70 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+Date: Tue, 15 Nov 2016 05:46:06 -0500
+Subject: ipv4: Restore fib_trie_flush_external function and fix call ordering
+
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+
+
+[ Upstream commit 3b7093346b326e5d3590c7d49f6aefe6fa5b2c9a, the FIB offload
+  removal didn't occur in 4.8 so that part of this patch isn't here.  However
+  we still need to fib_unmerge() bits. ]
+
+The patch that removed the FIB offload infrastructure was a bit too
+aggressive and also removed code needed to clean up us splitting the table
+if additional rules were added.  Specifically the function
+fib_trie_flush_external was called at the end of a new rule being added to
+flush the foreign trie entries from the main trie.
+
+I updated the code so that we only call fib_trie_flush_external on the main
+table so that we flush the entries for local from main.  This way we don't
+call it for every rule change which is what was happening previously.
+
+Fixes: 347e3b28c1ba2 ("switchdev: remove FIB offload infrastructure")
+Reported-by: Eric Dumazet <edumazet@google.com>
+Cc: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c |   20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -157,7 +157,7 @@ static void fib_replace_table(struct net
+ int fib_unmerge(struct net *net)
+ {
+-      struct fib_table *old, *new;
++      struct fib_table *old, *new, *main_table;
+       /* attempt to fetch local table if it has been allocated */
+       old = fib_get_table(net, RT_TABLE_LOCAL);
+@@ -168,11 +168,21 @@ int fib_unmerge(struct net *net)
+       if (!new)
+               return -ENOMEM;
++      /* table is already unmerged */
++      if (new == old)
++              return 0;
++
+       /* replace merged table with clean table */
+-      if (new != old) {
+-              fib_replace_table(net, old, new);
+-              fib_free_table(old);
+-      }
++      fib_replace_table(net, old, new);
++      fib_free_table(old);
++
++      /* attempt to fetch main table if it has been allocated */
++      main_table = fib_get_table(net, RT_TABLE_MAIN);
++      if (!main_table)
++              return 0;
++
++      /* flush local entries from main table */
++      fib_table_flush_external(main_table);
+       return 0;
+ }
diff --git a/queue-4.8/ipv6-bump-genid-when-the-ifa_f_tentative-flag-is-clear.patch b/queue-4.8/ipv6-bump-genid-when-the-ifa_f_tentative-flag-is-clear.patch
new file mode 100644 (file)
index 0000000..8a467d3
--- /dev/null
@@ -0,0 +1,124 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Tue, 22 Nov 2016 16:57:40 +0100
+Subject: ipv6: bump genid when the IFA_F_TENTATIVE flag is clear
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+
+[ Upstream commit 764d3be6e415b40056834bfd29b994dc3f837606 ]
+
+When an ipv6 address has the tentative flag set, it can't be
+used as source for egress traffic, while the associated route,
+if any, can be looked up and even stored into some dst_cache.
+
+In the latter scenario, the source ipv6 address selected and
+stored in the cache is most probably wrong (e.g. with
+link-local scope) and the entity using the dst_cache will
+experience lack of ipv6 connectivity until said cache is
+cleared or invalidated.
+
+Overall this may cause lack of connectivity over most IPv6 tunnels
+(comprising geneve and vxlan), if the first egress packet reaches
+the tunnel before the DaD is completed for the used ipv6
+address.
+
+This patch bumps a new genid after that the IFA_F_TENTATIVE flag
+is cleared, so that dst_cache will be invalidated on
+next lookup and ipv6 connectivity restored.
+
+Fixes: 0c1d70af924b ("net: use dst_cache for vxlan device")
+Fixes: 468dfffcd762 ("geneve: add dst caching support")
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |   18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -163,7 +163,7 @@ static struct rt6_info *addrconf_get_pre
+ static void addrconf_dad_start(struct inet6_ifaddr *ifp);
+ static void addrconf_dad_work(struct work_struct *w);
+-static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
++static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id);
+ static void addrconf_dad_run(struct inet6_dev *idev);
+ static void addrconf_rs_timer(unsigned long data);
+ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
+@@ -2893,6 +2893,7 @@ static void add_addr(struct inet6_dev *i
+               spin_lock_bh(&ifp->lock);
+               ifp->flags &= ~IFA_F_TENTATIVE;
+               spin_unlock_bh(&ifp->lock);
++              rt_genid_bump_ipv6(dev_net(idev->dev));
+               ipv6_ifa_notify(RTM_NEWADDR, ifp);
+               in6_ifa_put(ifp);
+       }
+@@ -3736,7 +3737,7 @@ static void addrconf_dad_begin(struct in
+ {
+       struct inet6_dev *idev = ifp->idev;
+       struct net_device *dev = idev->dev;
+-      bool notify = false;
++      bool bump_id, notify = false;
+       addrconf_join_solict(dev, &ifp->addr);
+@@ -3751,11 +3752,12 @@ static void addrconf_dad_begin(struct in
+           idev->cnf.accept_dad < 1 ||
+           !(ifp->flags&IFA_F_TENTATIVE) ||
+           ifp->flags & IFA_F_NODAD) {
++              bump_id = ifp->flags & IFA_F_TENTATIVE;
+               ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
+               spin_unlock(&ifp->lock);
+               read_unlock_bh(&idev->lock);
+-              addrconf_dad_completed(ifp);
++              addrconf_dad_completed(ifp, bump_id);
+               return;
+       }
+@@ -3815,8 +3817,8 @@ static void addrconf_dad_work(struct wor
+                                               struct inet6_ifaddr,
+                                               dad_work);
+       struct inet6_dev *idev = ifp->idev;
++      bool bump_id, disable_ipv6 = false;
+       struct in6_addr mcaddr;
+-      bool disable_ipv6 = false;
+       enum {
+               DAD_PROCESS,
+@@ -3886,11 +3888,12 @@ static void addrconf_dad_work(struct wor
+                * DAD was successful
+                */
++              bump_id = ifp->flags & IFA_F_TENTATIVE;
+               ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
+               spin_unlock(&ifp->lock);
+               write_unlock_bh(&idev->lock);
+-              addrconf_dad_completed(ifp);
++              addrconf_dad_completed(ifp, bump_id);
+               goto out;
+       }
+@@ -3927,7 +3930,7 @@ static bool ipv6_lonely_lladdr(struct in
+       return true;
+ }
+-static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
++static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id)
+ {
+       struct net_device *dev = ifp->idev->dev;
+       struct in6_addr lladdr;
+@@ -3978,6 +3981,9 @@ static void addrconf_dad_completed(struc
+               spin_unlock(&ifp->lock);
+               write_unlock_bh(&ifp->idev->lock);
+       }
++
++      if (bump_id)
++              rt_genid_bump_ipv6(dev_net(dev));
+ }
+ static void addrconf_dad_run(struct inet6_dev *idev)
diff --git a/queue-4.8/l2tp-fix-racy-sock_zapped-flag-check-in-l2tp_ip-6-_bind.patch b/queue-4.8/l2tp-fix-racy-sock_zapped-flag-check-in-l2tp_ip-6-_bind.patch
new file mode 100644 (file)
index 0000000..774e3aa
--- /dev/null
@@ -0,0 +1,166 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Fri, 18 Nov 2016 22:13:00 +0100
+Subject: l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{,6}_bind()
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit 32c231164b762dddefa13af5a0101032c70b50ef ]
+
+Lock socket before checking the SOCK_ZAPPED flag in l2tp_ip6_bind().
+Without lock, a concurrent call could modify the socket flags between
+the sock_flag(sk, SOCK_ZAPPED) test and the lock_sock() call. This way,
+a socket could be inserted twice in l2tp_ip6_bind_table. Releasing it
+would then leave a stale pointer there, generating use-after-free
+errors when walking through the list or modifying adjacent entries.
+
+BUG: KASAN: use-after-free in l2tp_ip6_close+0x22e/0x290 at addr ffff8800081b0ed8
+Write of size 8 by task syz-executor/10987
+CPU: 0 PID: 10987 Comm: syz-executor Not tainted 4.8.0+ #39
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014
+ ffff880031d97838 ffffffff829f835b ffff88001b5a1640 ffff8800081b0ec0
+ ffff8800081b15a0 ffff8800081b6d20 ffff880031d97860 ffffffff8174d3cc
+ ffff880031d978f0 ffff8800081b0e80 ffff88001b5a1640 ffff880031d978e0
+Call Trace:
+ [<ffffffff829f835b>] dump_stack+0xb3/0x118 lib/dump_stack.c:15
+ [<ffffffff8174d3cc>] kasan_object_err+0x1c/0x70 mm/kasan/report.c:156
+ [<     inline     >] print_address_description mm/kasan/report.c:194
+ [<ffffffff8174d666>] kasan_report_error+0x1f6/0x4d0 mm/kasan/report.c:283
+ [<     inline     >] kasan_report mm/kasan/report.c:303
+ [<ffffffff8174db7e>] __asan_report_store8_noabort+0x3e/0x40 mm/kasan/report.c:329
+ [<     inline     >] __write_once_size ./include/linux/compiler.h:249
+ [<     inline     >] __hlist_del ./include/linux/list.h:622
+ [<     inline     >] hlist_del_init ./include/linux/list.h:637
+ [<ffffffff8579047e>] l2tp_ip6_close+0x22e/0x290 net/l2tp/l2tp_ip6.c:239
+ [<ffffffff850b2dfd>] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415
+ [<ffffffff851dc5a0>] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422
+ [<ffffffff84c4581d>] sock_release+0x8d/0x1d0 net/socket.c:570
+ [<ffffffff84c45976>] sock_close+0x16/0x20 net/socket.c:1017
+ [<ffffffff817a108c>] __fput+0x28c/0x780 fs/file_table.c:208
+ [<ffffffff817a1605>] ____fput+0x15/0x20 fs/file_table.c:244
+ [<ffffffff813774f9>] task_work_run+0xf9/0x170
+ [<ffffffff81324aae>] do_exit+0x85e/0x2a00
+ [<ffffffff81326dc8>] do_group_exit+0x108/0x330
+ [<ffffffff81348cf7>] get_signal+0x617/0x17a0 kernel/signal.c:2307
+ [<ffffffff811b49af>] do_signal+0x7f/0x18f0
+ [<ffffffff810039bf>] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156
+ [<     inline     >] prepare_exit_to_usermode arch/x86/entry/common.c:190
+ [<ffffffff81006060>] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259
+ [<ffffffff85e4d726>] entry_SYSCALL_64_fastpath+0xc4/0xc6
+Object at ffff8800081b0ec0, in cache L2TP/IPv6 size: 1448
+Allocated:
+PID = 10987
+ [ 1116.897025] [<ffffffff811ddcb6>] save_stack_trace+0x16/0x20
+ [ 1116.897025] [<ffffffff8174c736>] save_stack+0x46/0xd0
+ [ 1116.897025] [<ffffffff8174c9ad>] kasan_kmalloc+0xad/0xe0
+ [ 1116.897025] [<ffffffff8174cee2>] kasan_slab_alloc+0x12/0x20
+ [ 1116.897025] [<     inline     >] slab_post_alloc_hook mm/slab.h:417
+ [ 1116.897025] [<     inline     >] slab_alloc_node mm/slub.c:2708
+ [ 1116.897025] [<     inline     >] slab_alloc mm/slub.c:2716
+ [ 1116.897025] [<ffffffff817476a8>] kmem_cache_alloc+0xc8/0x2b0 mm/slub.c:2721
+ [ 1116.897025] [<ffffffff84c4f6a9>] sk_prot_alloc+0x69/0x2b0 net/core/sock.c:1326
+ [ 1116.897025] [<ffffffff84c58ac8>] sk_alloc+0x38/0xae0 net/core/sock.c:1388
+ [ 1116.897025] [<ffffffff851ddf67>] inet6_create+0x2d7/0x1000 net/ipv6/af_inet6.c:182
+ [ 1116.897025] [<ffffffff84c4af7b>] __sock_create+0x37b/0x640 net/socket.c:1153
+ [ 1116.897025] [<     inline     >] sock_create net/socket.c:1193
+ [ 1116.897025] [<     inline     >] SYSC_socket net/socket.c:1223
+ [ 1116.897025] [<ffffffff84c4b46f>] SyS_socket+0xef/0x1b0 net/socket.c:1203
+ [ 1116.897025] [<ffffffff85e4d685>] entry_SYSCALL_64_fastpath+0x23/0xc6
+Freed:
+PID = 10987
+ [ 1116.897025] [<ffffffff811ddcb6>] save_stack_trace+0x16/0x20
+ [ 1116.897025] [<ffffffff8174c736>] save_stack+0x46/0xd0
+ [ 1116.897025] [<ffffffff8174cf61>] kasan_slab_free+0x71/0xb0
+ [ 1116.897025] [<     inline     >] slab_free_hook mm/slub.c:1352
+ [ 1116.897025] [<     inline     >] slab_free_freelist_hook mm/slub.c:1374
+ [ 1116.897025] [<     inline     >] slab_free mm/slub.c:2951
+ [ 1116.897025] [<ffffffff81748b28>] kmem_cache_free+0xc8/0x330 mm/slub.c:2973
+ [ 1116.897025] [<     inline     >] sk_prot_free net/core/sock.c:1369
+ [ 1116.897025] [<ffffffff84c541eb>] __sk_destruct+0x32b/0x4f0 net/core/sock.c:1444
+ [ 1116.897025] [<ffffffff84c5aca4>] sk_destruct+0x44/0x80 net/core/sock.c:1452
+ [ 1116.897025] [<ffffffff84c5ad33>] __sk_free+0x53/0x220 net/core/sock.c:1460
+ [ 1116.897025] [<ffffffff84c5af23>] sk_free+0x23/0x30 net/core/sock.c:1471
+ [ 1116.897025] [<ffffffff84c5cb6c>] sk_common_release+0x28c/0x3e0 ./include/net/sock.h:1589
+ [ 1116.897025] [<ffffffff8579044e>] l2tp_ip6_close+0x1fe/0x290 net/l2tp/l2tp_ip6.c:243
+ [ 1116.897025] [<ffffffff850b2dfd>] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415
+ [ 1116.897025] [<ffffffff851dc5a0>] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422
+ [ 1116.897025] [<ffffffff84c4581d>] sock_release+0x8d/0x1d0 net/socket.c:570
+ [ 1116.897025] [<ffffffff84c45976>] sock_close+0x16/0x20 net/socket.c:1017
+ [ 1116.897025] [<ffffffff817a108c>] __fput+0x28c/0x780 fs/file_table.c:208
+ [ 1116.897025] [<ffffffff817a1605>] ____fput+0x15/0x20 fs/file_table.c:244
+ [ 1116.897025] [<ffffffff813774f9>] task_work_run+0xf9/0x170
+ [ 1116.897025] [<ffffffff81324aae>] do_exit+0x85e/0x2a00
+ [ 1116.897025] [<ffffffff81326dc8>] do_group_exit+0x108/0x330
+ [ 1116.897025] [<ffffffff81348cf7>] get_signal+0x617/0x17a0 kernel/signal.c:2307
+ [ 1116.897025] [<ffffffff811b49af>] do_signal+0x7f/0x18f0
+ [ 1116.897025] [<ffffffff810039bf>] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156
+ [ 1116.897025] [<     inline     >] prepare_exit_to_usermode arch/x86/entry/common.c:190
+ [ 1116.897025] [<ffffffff81006060>] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259
+ [ 1116.897025] [<ffffffff85e4d726>] entry_SYSCALL_64_fastpath+0xc4/0xc6
+Memory state around the buggy address:
+ ffff8800081b0d80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff8800081b0e00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+>ffff8800081b0e80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
+                                                    ^
+ ffff8800081b0f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8800081b0f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+
+==================================================================
+
+The same issue exists with l2tp_ip_bind() and l2tp_ip_bind_table.
+
+Fixes: c51ce49735c1 ("l2tp: fix oops in L2TP IP sockets for connect() AF_UNSPEC case")
+Reported-by: Baozeng Ding <sploving1@gmail.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Tested-by: Baozeng Ding <sploving1@gmail.com>
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_ip.c  |    5 +++--
+ net/l2tp/l2tp_ip6.c |    5 +++--
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+--- a/net/l2tp/l2tp_ip.c
++++ b/net/l2tp/l2tp_ip.c
+@@ -251,8 +251,6 @@ static int l2tp_ip_bind(struct sock *sk,
+       int ret;
+       int chk_addr_ret;
+-      if (!sock_flag(sk, SOCK_ZAPPED))
+-              return -EINVAL;
+       if (addr_len < sizeof(struct sockaddr_l2tpip))
+               return -EINVAL;
+       if (addr->l2tp_family != AF_INET)
+@@ -267,6 +265,9 @@ static int l2tp_ip_bind(struct sock *sk,
+       read_unlock_bh(&l2tp_ip_lock);
+       lock_sock(sk);
++      if (!sock_flag(sk, SOCK_ZAPPED))
++              goto out;
++
+       if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip))
+               goto out;
+--- a/net/l2tp/l2tp_ip6.c
++++ b/net/l2tp/l2tp_ip6.c
+@@ -269,8 +269,6 @@ static int l2tp_ip6_bind(struct sock *sk
+       int addr_type;
+       int err;
+-      if (!sock_flag(sk, SOCK_ZAPPED))
+-              return -EINVAL;
+       if (addr->l2tp_family != AF_INET6)
+               return -EINVAL;
+       if (addr_len < sizeof(*addr))
+@@ -296,6 +294,9 @@ static int l2tp_ip6_bind(struct sock *sk
+       lock_sock(sk);
+       err = -EINVAL;
++      if (!sock_flag(sk, SOCK_ZAPPED))
++              goto out_unlock;
++
+       if (sk->sk_state != TCP_CLOSE)
+               goto out_unlock;
diff --git a/queue-4.8/net-avoid-signed-overflows-for-so_-snd-rcv-bufforce.patch b/queue-4.8/net-avoid-signed-overflows-for-so_-snd-rcv-bufforce.patch
new file mode 100644 (file)
index 0000000..a3fb58d
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 2 Dec 2016 09:44:53 -0800
+Subject: net: avoid signed overflows for SO_{SND|RCV}BUFFORCE
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit b98b0bc8c431e3ceb4b26b0dfc8db509518fb290 ]
+
+CAP_NET_ADMIN users should not be allowed to set negative
+sk_sndbuf or sk_rcvbuf values, as it can lead to various memory
+corruptions, crashes, OOM...
+
+Note that before commit 82981930125a ("net: cleanups in
+sock_setsockopt()"), the bug was even more serious, since SO_SNDBUF
+and SO_RCVBUF were vulnerable.
+
+This needs to be backported to all known linux kernels.
+
+Again, many thanks to syzkaller team for discovering this gem.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -715,7 +715,7 @@ int sock_setsockopt(struct socket *sock,
+               val = min_t(u32, val, sysctl_wmem_max);
+ set_sndbuf:
+               sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+-              sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF);
++              sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
+               /* Wake up sending tasks if we upped the value. */
+               sk->sk_write_space(sk);
+               break;
+@@ -751,7 +751,7 @@ set_rcvbuf:
+                * returning the value we actually used in getsockopt
+                * is the most desirable behavior.
+                */
+-              sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF);
++              sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+               break;
+       case SO_RCVBUFFORCE:
diff --git a/queue-4.8/net-bcmgenet-utilize-correct-struct-device-for-all-dma-operations.patch b/queue-4.8/net-bcmgenet-utilize-correct-struct-device-for-all-dma-operations.patch
new file mode 100644 (file)
index 0000000..3529e6b
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Thu, 1 Dec 2016 09:45:45 -0800
+Subject: net: bcmgenet: Utilize correct struct device for all DMA operations
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit 8c4799ac799665065f9bf1364fd71bf4f7dc6a4a ]
+
+__bcmgenet_tx_reclaim() and bcmgenet_free_rx_buffers() are not using the
+same struct device during unmap that was used for the map operation,
+which makes DMA-API debugging warn about it. Fix this by always using
+&priv->pdev->dev throughout the driver, using an identical device
+reference for all map/unmap calls.
+
+Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/genet/bcmgenet.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+@@ -1172,6 +1172,7 @@ static unsigned int __bcmgenet_tx_reclai
+                                         struct bcmgenet_tx_ring *ring)
+ {
+       struct bcmgenet_priv *priv = netdev_priv(dev);
++      struct device *kdev = &priv->pdev->dev;
+       struct enet_cb *tx_cb_ptr;
+       struct netdev_queue *txq;
+       unsigned int pkts_compl = 0;
+@@ -1199,13 +1200,13 @@ static unsigned int __bcmgenet_tx_reclai
+               if (tx_cb_ptr->skb) {
+                       pkts_compl++;
+                       bytes_compl += GENET_CB(tx_cb_ptr->skb)->bytes_sent;
+-                      dma_unmap_single(&dev->dev,
++                      dma_unmap_single(kdev,
+                                        dma_unmap_addr(tx_cb_ptr, dma_addr),
+                                        dma_unmap_len(tx_cb_ptr, dma_len),
+                                        DMA_TO_DEVICE);
+                       bcmgenet_free_cb(tx_cb_ptr);
+               } else if (dma_unmap_addr(tx_cb_ptr, dma_addr)) {
+-                      dma_unmap_page(&dev->dev,
++                      dma_unmap_page(kdev,
+                                      dma_unmap_addr(tx_cb_ptr, dma_addr),
+                                      dma_unmap_len(tx_cb_ptr, dma_len),
+                                      DMA_TO_DEVICE);
+@@ -1775,6 +1776,7 @@ static int bcmgenet_alloc_rx_buffers(str
+ static void bcmgenet_free_rx_buffers(struct bcmgenet_priv *priv)
+ {
++      struct device *kdev = &priv->pdev->dev;
+       struct enet_cb *cb;
+       int i;
+@@ -1782,7 +1784,7 @@ static void bcmgenet_free_rx_buffers(str
+               cb = &priv->rx_cbs[i];
+               if (dma_unmap_addr(cb, dma_addr)) {
+-                      dma_unmap_single(&priv->dev->dev,
++                      dma_unmap_single(kdev,
+                                        dma_unmap_addr(cb, dma_addr),
+                                        priv->rx_buf_len, DMA_FROM_DEVICE);
+                       dma_unmap_addr_set(cb, dma_addr, 0);
diff --git a/queue-4.8/net-check-dead-netns-for-peernet2id_alloc.patch b/queue-4.8/net-check-dead-netns-for-peernet2id_alloc.patch
new file mode 100644 (file)
index 0000000..db6f8bc
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Wed, 16 Nov 2016 10:27:02 -0800
+Subject: net: check dead netns for peernet2id_alloc()
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit cfc44a4d147ea605d66ccb917cc24467d15ff867 ]
+
+Andrei reports we still allocate netns ID from idr after we destroy
+it in cleanup_net().
+
+cleanup_net():
+  ...
+  idr_destroy(&net->netns_ids);
+  ...
+  list_for_each_entry_reverse(ops, &pernet_list, list)
+    ops_exit_list(ops, &net_exit_list);
+      -> rollback_registered_many()
+        -> rtmsg_ifinfo_build_skb()
+         -> rtnl_fill_ifinfo()
+           -> peernet2id_alloc()
+
+After that point we should not even access net->netns_ids, we
+should check the death of the current netns as early as we can in
+peernet2id_alloc().
+
+For net-next we can consider to avoid sending rtmsg totally,
+it is a good optimization for netns teardown path.
+
+Fixes: 0c7aecd4bde4 ("netns: add rtnl cmd to add and get peer netns ids")
+Reported-by: Andrei Vagin <avagin@gmail.com>
+Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Acked-by: Andrei Vagin <avagin@openvz.org>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/net_namespace.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/core/net_namespace.c
++++ b/net/core/net_namespace.c
+@@ -217,6 +217,8 @@ int peernet2id_alloc(struct net *net, st
+       bool alloc;
+       int id;
++      if (atomic_read(&net->count) == 0)
++              return NETNSA_NSID_NOT_ASSIGNED;
+       spin_lock_irqsave(&net->nsid_lock, flags);
+       alloc = atomic_read(&peer->count) == 0 ? false : true;
+       id = __peernet2id_alloc(net, peer, &alloc);
diff --git a/queue-4.8/net-dccp-fix-use-after-free-in-dccp_invalid_packet.patch b/queue-4.8/net-dccp-fix-use-after-free-in-dccp_invalid_packet.patch
new file mode 100644 (file)
index 0000000..56d784a
--- /dev/null
@@ -0,0 +1,58 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 28 Nov 2016 06:26:49 -0800
+Subject: net/dccp: fix use-after-free in dccp_invalid_packet
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 648f0c28df282636c0c8a7a19ca3ce5fc80a39c3 ]
+
+pskb_may_pull() can reallocate skb->head, we need to reload dh pointer
+in dccp_invalid_packet() or risk use after free.
+
+Bug found by Andrey Konovalov using syzkaller.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv4.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -700,6 +700,7 @@ int dccp_invalid_packet(struct sk_buff *
+ {
+       const struct dccp_hdr *dh;
+       unsigned int cscov;
++      u8 dccph_doff;
+       if (skb->pkt_type != PACKET_HOST)
+               return 1;
+@@ -721,18 +722,19 @@ int dccp_invalid_packet(struct sk_buff *
+       /*
+        * If P.Data Offset is too small for packet type, drop packet and return
+        */
+-      if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
+-              DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff);
++      dccph_doff = dh->dccph_doff;
++      if (dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
++              DCCP_WARN("P.Data Offset(%u) too small\n", dccph_doff);
+               return 1;
+       }
+       /*
+        * If P.Data Offset is too too large for packet, drop packet and return
+        */
+-      if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
+-              DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff);
++      if (!pskb_may_pull(skb, dccph_doff * sizeof(u32))) {
++              DCCP_WARN("P.Data Offset(%u) too large\n", dccph_doff);
+               return 1;
+       }
+-
++      dh = dccp_hdr(skb);
+       /*
+        * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
+        * has short sequence numbers), drop packet and return
diff --git a/queue-4.8/net-dsa-b53-fix-vlan-usage-and-how-we-treat-cpu-port.patch b/queue-4.8/net-dsa-b53-fix-vlan-usage-and-how-we-treat-cpu-port.patch
new file mode 100644 (file)
index 0000000..5a6fcc5
--- /dev/null
@@ -0,0 +1,101 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Tue, 15 Nov 2016 15:58:15 -0800
+Subject: net: dsa: b53: Fix VLAN usage and how we treat CPU port
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit e47112d9d6009bf6b7438cedc0270316d6b0370d ]
+
+We currently have a fundamental problem in how we treat the CPU port and
+its VLAN membership. As soon as a second VLAN is configured to be
+untagged, the CPU automatically becomes untagged for that VLAN as well,
+and yet, we don't gracefully make sure that the CPU becomes tagged in
+the other VLANs it could be a member of. This results in only one VLAN
+being effectively usable from the CPU's perspective.
+
+Instead of having some pretty complex logic which tries to maintain the
+CPU port's default VLAN and its untagged properties, just do something
+very simple which consists in neither altering the CPU port's PVID
+settings, nor its untagged settings:
+
+- whenever a VLAN is added, the CPU is automatically a member of this
+  VLAN group, as a tagged member
+- PVID settings for downstream ports do not alter the CPU port's PVID
+  since it now is part of all VLANs in the system
+
+This means that a typical example where e.g: LAN ports are in VLAN1, and
+WAN port is in VLAN2, now require having two VLAN interfaces for the
+host to properly terminate and send traffic from/to.
+
+Fixes: Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support")
+Reported-by: Hartmut Knaack <knaack.h@gmx.de>
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/b53/b53_common.c |   16 ++++------------
+ 1 file changed, 4 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -904,9 +904,10 @@ static void b53_vlan_add(struct dsa_swit
+               vl->members |= BIT(port) | BIT(cpu_port);
+               if (untagged)
+-                      vl->untag |= BIT(port) | BIT(cpu_port);
++                      vl->untag |= BIT(port);
+               else
+-                      vl->untag &= ~(BIT(port) | BIT(cpu_port));
++                      vl->untag &= ~BIT(port);
++              vl->untag &= ~BIT(cpu_port);
+               b53_set_vlan_entry(dev, vid, vl);
+               b53_fast_age_vlan(dev, vid);
+@@ -915,8 +916,6 @@ static void b53_vlan_add(struct dsa_swit
+       if (pvid) {
+               b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port),
+                           vlan->vid_end);
+-              b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port),
+-                          vlan->vid_end);
+               b53_fast_age_vlan(dev, vid);
+       }
+ }
+@@ -926,7 +925,6 @@ static int b53_vlan_del(struct dsa_switc
+ {
+       struct b53_device *dev = ds_to_priv(ds);
+       bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+-      unsigned int cpu_port = dev->cpu_port;
+       struct b53_vlan *vl;
+       u16 vid;
+       u16 pvid;
+@@ -939,8 +937,6 @@ static int b53_vlan_del(struct dsa_switc
+               b53_get_vlan_entry(dev, vid, vl);
+               vl->members &= ~BIT(port);
+-              if ((vl->members & BIT(cpu_port)) == BIT(cpu_port))
+-                      vl->members = 0;
+               if (pvid == vid) {
+                       if (is5325(dev) || is5365(dev))
+@@ -949,18 +945,14 @@ static int b53_vlan_del(struct dsa_switc
+                               pvid = 0;
+               }
+-              if (untagged) {
++              if (untagged)
+                       vl->untag &= ~(BIT(port));
+-                      if ((vl->untag & BIT(cpu_port)) == BIT(cpu_port))
+-                              vl->untag = 0;
+-              }
+               b53_set_vlan_entry(dev, vid, vl);
+               b53_fast_age_vlan(dev, vid);
+       }
+       b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), pvid);
+-      b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port), pvid);
+       b53_fast_age_vlan(dev, pvid);
+       return 0;
diff --git a/queue-4.8/net-dsa-bcm_sf2-ensure-we-re-negotiate-eee-during-after-link-change.patch b/queue-4.8/net-dsa-bcm_sf2-ensure-we-re-negotiate-eee-during-after-link-change.patch
new file mode 100644 (file)
index 0000000..55ad99e
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Tue, 22 Nov 2016 11:40:58 -0800
+Subject: net: dsa: bcm_sf2: Ensure we re-negotiate EEE during after link change
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit 76da8706d90d8641eeb9b8e579942ed80b6c0880 ]
+
+In case the link change and EEE is enabled or disabled, always try to
+re-negotiate this with the link partner.
+
+Fixes: 450b05c15f9c ("net: dsa: bcm_sf2: add support for controlling EEE")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -1167,6 +1167,7 @@ static void bcm_sf2_sw_adjust_link(struc
+                                  struct phy_device *phydev)
+ {
+       struct bcm_sf2_priv *priv = ds_to_priv(ds);
++      struct ethtool_eee *p = &priv->port_sts[port].eee;
+       u32 id_mode_dis = 0, port_mode;
+       const char *str = NULL;
+       u32 reg;
+@@ -1241,6 +1242,9 @@ force_link:
+               reg |= DUPLX_MODE;
+       core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port));
++
++      if (!phydev->is_pseudo_fixed_link)
++              p->eee_enabled = bcm_sf2_eee_init(ds, port, phydev);
+ }
+ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port,
diff --git a/queue-4.8/net-dsa-fix-unbalanced-dsa_switch_tree-reference-counting.patch b/queue-4.8/net-dsa-fix-unbalanced-dsa_switch_tree-reference-counting.patch
new file mode 100644 (file)
index 0000000..7a97cb3
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
+Date: Mon, 28 Nov 2016 09:48:48 +0300
+Subject: net: dsa: fix unbalanced dsa_switch_tree reference counting
+
+From: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
+
+
+[ Upstream commit 7a99cd6e213685b78118382e6a8fed506c82ccb2 ]
+
+_dsa_register_switch() gets a dsa_switch_tree object either via
+dsa_get_dst() or via dsa_add_dst(). Former path does not increase kref
+in returned object (resulting into caller not owning a reference),
+while later path does create a new object (resulting into caller owning
+a reference).
+
+The rest of _dsa_register_switch() assumes that it owns a reference, and
+calls dsa_put_dst().
+
+This causes a memory breakage if first switch in the tree initialized
+successfully, but second failed to initialize. In particular, freed
+dsa_swith_tree object is left referenced by switch that was initialized,
+and later access to sysfs attributes of that switch cause OOPS.
+
+To fix, need to add kref_get() call to dsa_get_dst().
+
+Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation")
+Signed-off-by: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dsa/dsa2.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/dsa/dsa2.c
++++ b/net/dsa/dsa2.c
+@@ -28,8 +28,10 @@ static struct dsa_switch_tree *dsa_get_d
+       struct dsa_switch_tree *dst;
+       list_for_each_entry(dst, &dsa_switch_trees, list)
+-              if (dst->tree == tree)
++              if (dst->tree == tree) {
++                      kref_get(&dst->refcount);
+                       return dst;
++              }
+       return NULL;
+ }
diff --git a/queue-4.8/net-macb-fix-the-rx-queue-reset-in-macb_rx.patch b/queue-4.8/net-macb-fix-the-rx-queue-reset-in-macb_rx.patch
new file mode 100644 (file)
index 0000000..c51b58b
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
+Date: Mon, 28 Nov 2016 14:40:55 +0100
+Subject: net: macb: fix the RX queue reset in macb_rx()
+
+From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
+
+
+[ Upstream commit a0b44eea372b449ef9744fb1d90491cc063289b8 ]
+
+On macb only (not gem), when a RX queue corruption was detected from
+macb_rx(), the RX queue was reset: during this process the RX ring
+buffer descriptor was initialized by macb_init_rx_ring() but we forgot
+to also set bp->rx_tail to 0.
+
+Indeed, when processing the received frames, bp->rx_tail provides the
+macb driver with the index in the RX ring buffer of the next buffer to
+process. So when the whole ring buffer is reset we must also reset
+bp->rx_tail so the driver is synchronized again with the hardware.
+
+Since macb_init_rx_ring() is called from many locations, currently from
+macb_rx() and macb_init_rings(), we'd rather add the "bp->rx_tail = 0;"
+line inside macb_init_rx_ring() than add the very same line after each
+call of this function.
+
+Without this fix, the rx queue is not reset properly to recover from
+queue corruption and connection drop may occur.
+
+Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
+Fixes: 9ba723b081a2 ("net: macb: remove BUG_ON() and reset the queue to handle RX errors")
+Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cadence/macb.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/cadence/macb.c
++++ b/drivers/net/ethernet/cadence/macb.c
+@@ -959,6 +959,7 @@ static inline void macb_init_rx_ring(str
+               addr += bp->rx_buffer_size;
+       }
+       bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP);
++      bp->rx_tail = 0;
+ }
+ static int macb_rx(struct macb *bp, int budget)
+@@ -1597,8 +1598,6 @@ static void macb_init_rings(struct macb
+       bp->queues[0].tx_head = 0;
+       bp->queues[0].tx_tail = 0;
+       bp->queues[0].tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
+-
+-      bp->rx_tail = 0;
+ }
+ static void macb_reset_hw(struct macb *bp)
diff --git a/queue-4.8/net-ping-check-minimum-size-on-icmp-header-length.patch b/queue-4.8/net-ping-check-minimum-size-on-icmp-header-length.patch
new file mode 100644 (file)
index 0000000..4dd0319
--- /dev/null
@@ -0,0 +1,72 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Kees Cook <keescook@chromium.org>
+Date: Mon, 5 Dec 2016 10:34:38 -0800
+Subject: net: ping: check minimum size on ICMP header length
+
+From: Kees Cook <keescook@chromium.org>
+
+
+[ Upstream commit 0eab121ef8750a5c8637d51534d5e9143fb0633f ]
+
+Prior to commit c0371da6047a ("put iov_iter into msghdr") in v3.19, there
+was no check that the iovec contained enough bytes for an ICMP header,
+and the read loop would walk across neighboring stack contents. Since the
+iov_iter conversion, bad arguments are noticed, but the returned error is
+EFAULT. Returning EINVAL is a clearer error and also solves the problem
+prior to v3.19.
+
+This was found using trinity with KASAN on v3.18:
+
+BUG: KASAN: stack-out-of-bounds in memcpy_fromiovec+0x60/0x114 at addr ffffffc071077da0
+Read of size 8 by task trinity-c2/9623
+page:ffffffbe034b9a08 count:0 mapcount:0 mapping:          (null) index:0x0
+flags: 0x0()
+page dumped because: kasan: bad access detected
+CPU: 0 PID: 9623 Comm: trinity-c2 Tainted: G    BU         3.18.0-dirty #15
+Hardware name: Google Tegra210 Smaug Rev 1,3+ (DT)
+Call trace:
+[<ffffffc000209c98>] dump_backtrace+0x0/0x1ac arch/arm64/kernel/traps.c:90
+[<ffffffc000209e54>] show_stack+0x10/0x1c arch/arm64/kernel/traps.c:171
+[<     inline     >] __dump_stack lib/dump_stack.c:15
+[<ffffffc000f18dc4>] dump_stack+0x7c/0xd0 lib/dump_stack.c:50
+[<     inline     >] print_address_description mm/kasan/report.c:147
+[<     inline     >] kasan_report_error mm/kasan/report.c:236
+[<ffffffc000373dcc>] kasan_report+0x380/0x4b8 mm/kasan/report.c:259
+[<     inline     >] check_memory_region mm/kasan/kasan.c:264
+[<ffffffc00037352c>] __asan_load8+0x20/0x70 mm/kasan/kasan.c:507
+[<ffffffc0005b9624>] memcpy_fromiovec+0x5c/0x114 lib/iovec.c:15
+[<     inline     >] memcpy_from_msg include/linux/skbuff.h:2667
+[<ffffffc000ddeba0>] ping_common_sendmsg+0x50/0x108 net/ipv4/ping.c:674
+[<ffffffc000dded30>] ping_v4_sendmsg+0xd8/0x698 net/ipv4/ping.c:714
+[<ffffffc000dc91dc>] inet_sendmsg+0xe0/0x12c net/ipv4/af_inet.c:749
+[<     inline     >] __sock_sendmsg_nosec net/socket.c:624
+[<     inline     >] __sock_sendmsg net/socket.c:632
+[<ffffffc000cab61c>] sock_sendmsg+0x124/0x164 net/socket.c:643
+[<     inline     >] SYSC_sendto net/socket.c:1797
+[<ffffffc000cad270>] SyS_sendto+0x178/0x1d8 net/socket.c:1761
+
+CVE-2016-8399
+
+Reported-by: Qidan He <i@flanker017.me>
+Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ping.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ipv4/ping.c
++++ b/net/ipv4/ping.c
+@@ -662,6 +662,10 @@ int ping_common_sendmsg(int family, stru
+       if (len > 0xFFFF)
+               return -EMSGSIZE;
++      /* Must have at least a full ICMP header. */
++      if (len < icmph_len)
++              return -EINVAL;
++
+       /*
+        *      Check the flags.
+        */
diff --git a/queue-4.8/net-sched-pedit-make-sure-that-offset-is-valid.patch b/queue-4.8/net-sched-pedit-make-sure-that-offset-is-valid.patch
new file mode 100644 (file)
index 0000000..ecf992e
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Amir Vadai <amir@vadai.me>
+Date: Mon, 28 Nov 2016 12:56:40 +0200
+Subject: net/sched: pedit: make sure that offset is valid
+
+From: Amir Vadai <amir@vadai.me>
+
+
+[ Upstream commit 95c2027bfeda21a28eb245121e6a249f38d0788e ]
+
+Add a validation function to make sure offset is valid:
+1. Not below skb head (could happen when offset is negative).
+2. Validate both 'offset' and 'at'.
+
+Signed-off-by: Amir Vadai <amir@vadai.me>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_pedit.c |   24 ++++++++++++++++++++----
+ 1 file changed, 20 insertions(+), 4 deletions(-)
+
+--- a/net/sched/act_pedit.c
++++ b/net/sched/act_pedit.c
+@@ -108,6 +108,17 @@ static void tcf_pedit_cleanup(struct tc_
+       kfree(keys);
+ }
++static bool offset_valid(struct sk_buff *skb, int offset)
++{
++      if (offset > 0 && offset > skb->len)
++              return false;
++
++      if  (offset < 0 && -offset > skb_headroom(skb))
++              return false;
++
++      return true;
++}
++
+ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
+                    struct tcf_result *res)
+ {
+@@ -134,6 +145,11 @@ static int tcf_pedit(struct sk_buff *skb
+                       if (tkey->offmask) {
+                               char *d, _d;
++                              if (!offset_valid(skb, off + tkey->at)) {
++                                      pr_info("tc filter pedit 'at' offset %d out of bounds\n",
++                                              off + tkey->at);
++                                      goto bad;
++                              }
+                               d = skb_header_pointer(skb, off + tkey->at, 1,
+                                                      &_d);
+                               if (!d)
+@@ -146,10 +162,10 @@ static int tcf_pedit(struct sk_buff *skb
+                                       " offset must be on 32 bit boundaries\n");
+                               goto bad;
+                       }
+-                      if (offset > 0 && offset > skb->len) {
+-                              pr_info("tc filter pedit"
+-                                      " offset %d can't exceed pkt length %d\n",
+-                                     offset, skb->len);
++
++                      if (!offset_valid(skb, off + offset)) {
++                              pr_info("tc filter pedit offset %d out of bounds\n",
++                                      offset);
+                               goto bad;
+                       }
diff --git a/queue-4.8/net-sched-respect-rcu-grace-period-on-cls-destruction.patch b/queue-4.8/net-sched-respect-rcu-grace-period-on-cls-destruction.patch
new file mode 100644 (file)
index 0000000..91cbfc3
--- /dev/null
@@ -0,0 +1,257 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Sun, 27 Nov 2016 01:18:01 +0100
+Subject: net, sched: respect rcu grace period on cls destruction
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit d936377414fadbafb4d17148d222fe45ca5442d4 ]
+
+Roi reported a crash in flower where tp->root was NULL in ->classify()
+callbacks. Reason is that in ->destroy() tp->root is set to NULL via
+RCU_INIT_POINTER(). It's problematic for some of the classifiers, because
+this doesn't respect RCU grace period for them, and as a result, still
+outstanding readers from tc_classify() will try to blindly dereference
+a NULL tp->root.
+
+The tp->root object is strictly private to the classifier implementation
+and holds internal data the core such as tc_ctl_tfilter() doesn't know
+about. Within some classifiers, such as cls_bpf, cls_basic, etc, tp->root
+is only checked for NULL in ->get() callback, but nowhere else. This is
+misleading and seemed to be copied from old classifier code that was not
+cleaned up properly. For example, d3fa76ee6b4a ("[NET_SCHED]: cls_basic:
+fix NULL pointer dereference") moved tp->root initialization into ->init()
+routine, where before it was part of ->change(), so ->get() had to deal
+with tp->root being NULL back then, so that was indeed a valid case, after
+d3fa76ee6b4a, not really anymore. We used to set tp->root to NULL long
+ago in ->destroy(), see 47a1a1d4be29 ("pkt_sched: remove unnecessary xchg()
+in packet classifiers"); but the NULLifying was reintroduced with the
+RCUification, but it's not correct for every classifier implementation.
+
+In the cases that are fixed here with one exception of cls_cgroup, tp->root
+object is allocated and initialized inside ->init() callback, which is always
+performed at a point in time after we allocate a new tp, which means tp and
+thus tp->root was not globally visible in the tp chain yet (see tc_ctl_tfilter()).
+Also, on destruction tp->root is strictly kfree_rcu()'ed in ->destroy()
+handler, same for the tp which is kfree_rcu()'ed right when we return
+from ->destroy() in tcf_destroy(). This means, the head object's lifetime
+for such classifiers is always tied to the tp lifetime. The RCU callback
+invocation for the two kfree_rcu() could be out of order, but that's fine
+since both are independent.
+
+Dropping the RCU_INIT_POINTER(tp->root, NULL) for these classifiers here
+means that 1) we don't need a useless NULL check in fast-path and, 2) that
+outstanding readers of that tp in tc_classify() can still execute under
+respect with RCU grace period as it is actually expected.
+
+Things that haven't been touched here: cls_fw and cls_route. They each
+handle tp->root being NULL in ->classify() path for historic reasons, so
+their ->destroy() implementation can stay as is. If someone actually
+cares, they could get cleaned up at some point to avoid the test in fast
+path. cls_u32 doesn't set tp->root to NULL. For cls_rsvp, I just added a
+!head should anyone actually be using/testing it, so it at least aligns with
+cls_fw and cls_route. For cls_flower we additionally need to defer rhashtable
+destruction (to a sleepable context) after RCU grace period as concurrent
+readers might still access it. (Note that in this case we need to hold module
+reference to keep work callback address intact, since we only wait on module
+unload for all call_rcu()s to finish.)
+
+This fixes one race to bring RCU grace period guarantees back. Next step
+as worked on by Cong however is to fix 1e052be69d04 ("net_sched: destroy
+proto tp when all filters are gone") to get the order of unlinking the tp
+in tc_ctl_tfilter() for the RTM_DELTFILTER case right by moving
+RCU_INIT_POINTER() before tcf_destroy() and let the notification for
+removal be done through the prior ->delete() callback. Both are independant
+issues. Once we have that right, we can then clean tp->root up for a number
+of classifiers by not making them RCU pointers, which requires a new callback
+(->uninit) that is triggered from tp's RCU callback, where we just kfree()
+tp->root from there.
+
+Fixes: 1f947bf151e9 ("net: sched: rcu'ify cls_bpf")
+Fixes: 9888faefe132 ("net: sched: cls_basic use RCU")
+Fixes: 70da9f0bf999 ("net: sched: cls_flow use RCU")
+Fixes: 77b9900ef53a ("tc: introduce Flower classifier")
+Fixes: bf3994d2ed31 ("net/sched: introduce Match-all classifier")
+Fixes: 952313bd6258 ("net: sched: cls_cgroup use RCU")
+Reported-by: Roi Dayan <roid@mellanox.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Cc: John Fastabend <john.fastabend@gmail.com>
+Cc: Roi Dayan <roid@mellanox.com>
+Cc: Jiri Pirko <jiri@mellanox.com>
+Acked-by: John Fastabend <john.r.fastabend@intel.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_basic.c    |    4 ----
+ net/sched/cls_bpf.c      |    4 ----
+ net/sched/cls_cgroup.c   |    7 +++----
+ net/sched/cls_flow.c     |    1 -
+ net/sched/cls_flower.c   |   31 ++++++++++++++++++++++++++-----
+ net/sched/cls_matchall.c |    1 -
+ net/sched/cls_rsvp.h     |    3 ++-
+ net/sched/cls_tcindex.c  |    1 -
+ 8 files changed, 31 insertions(+), 21 deletions(-)
+
+--- a/net/sched/cls_basic.c
++++ b/net/sched/cls_basic.c
+@@ -62,9 +62,6 @@ static unsigned long basic_get(struct tc
+       struct basic_head *head = rtnl_dereference(tp->root);
+       struct basic_filter *f;
+-      if (head == NULL)
+-              return 0UL;
+-
+       list_for_each_entry(f, &head->flist, link) {
+               if (f->handle == handle) {
+                       l = (unsigned long) f;
+@@ -109,7 +106,6 @@ static bool basic_destroy(struct tcf_pro
+               tcf_unbind_filter(tp, &f->res);
+               call_rcu(&f->rcu, basic_delete_filter);
+       }
+-      RCU_INIT_POINTER(tp->root, NULL);
+       kfree_rcu(head, rcu);
+       return true;
+ }
+--- a/net/sched/cls_bpf.c
++++ b/net/sched/cls_bpf.c
+@@ -200,7 +200,6 @@ static bool cls_bpf_destroy(struct tcf_p
+               call_rcu(&prog->rcu, __cls_bpf_delete_prog);
+       }
+-      RCU_INIT_POINTER(tp->root, NULL);
+       kfree_rcu(head, rcu);
+       return true;
+ }
+@@ -211,9 +210,6 @@ static unsigned long cls_bpf_get(struct
+       struct cls_bpf_prog *prog;
+       unsigned long ret = 0UL;
+-      if (head == NULL)
+-              return 0UL;
+-
+       list_for_each_entry(prog, &head->plist, link) {
+               if (prog->handle == handle) {
+                       ret = (unsigned long) prog;
+--- a/net/sched/cls_cgroup.c
++++ b/net/sched/cls_cgroup.c
+@@ -130,11 +130,10 @@ static bool cls_cgroup_destroy(struct tc
+       if (!force)
+               return false;
+-
+-      if (head) {
+-              RCU_INIT_POINTER(tp->root, NULL);
++      /* Head can still be NULL due to cls_cgroup_init(). */
++      if (head)
+               call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+-      }
++
+       return true;
+ }
+--- a/net/sched/cls_flow.c
++++ b/net/sched/cls_flow.c
+@@ -583,7 +583,6 @@ static bool flow_destroy(struct tcf_prot
+               list_del_rcu(&f->list);
+               call_rcu(&f->rcu, flow_destroy_filter);
+       }
+-      RCU_INIT_POINTER(tp->root, NULL);
+       kfree_rcu(head, rcu);
+       return true;
+ }
+--- a/net/sched/cls_flower.c
++++ b/net/sched/cls_flower.c
+@@ -13,6 +13,7 @@
+ #include <linux/init.h>
+ #include <linux/module.h>
+ #include <linux/rhashtable.h>
++#include <linux/workqueue.h>
+ #include <linux/if_ether.h>
+ #include <linux/in6.h>
+@@ -55,7 +56,10 @@ struct cls_fl_head {
+       bool mask_assigned;
+       struct list_head filters;
+       struct rhashtable_params ht_params;
+-      struct rcu_head rcu;
++      union {
++              struct work_struct work;
++              struct rcu_head rcu;
++      };
+ };
+ struct cls_fl_filter {
+@@ -239,6 +243,24 @@ static void fl_hw_update_stats(struct tc
+       dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+ }
++static void fl_destroy_sleepable(struct work_struct *work)
++{
++      struct cls_fl_head *head = container_of(work, struct cls_fl_head,
++                                              work);
++      if (head->mask_assigned)
++              rhashtable_destroy(&head->ht);
++      kfree(head);
++      module_put(THIS_MODULE);
++}
++
++static void fl_destroy_rcu(struct rcu_head *rcu)
++{
++      struct cls_fl_head *head = container_of(rcu, struct cls_fl_head, rcu);
++
++      INIT_WORK(&head->work, fl_destroy_sleepable);
++      schedule_work(&head->work);
++}
++
+ static bool fl_destroy(struct tcf_proto *tp, bool force)
+ {
+       struct cls_fl_head *head = rtnl_dereference(tp->root);
+@@ -252,10 +274,9 @@ static bool fl_destroy(struct tcf_proto
+               list_del_rcu(&f->list);
+               call_rcu(&f->rcu, fl_destroy_filter);
+       }
+-      RCU_INIT_POINTER(tp->root, NULL);
+-      if (head->mask_assigned)
+-              rhashtable_destroy(&head->ht);
+-      kfree_rcu(head, rcu);
++
++      __module_get(THIS_MODULE);
++      call_rcu(&head->rcu, fl_destroy_rcu);
+       return true;
+ }
+--- a/net/sched/cls_matchall.c
++++ b/net/sched/cls_matchall.c
+@@ -114,7 +114,6 @@ static bool mall_destroy(struct tcf_prot
+               call_rcu(&f->rcu, mall_destroy_filter);
+       }
+-      RCU_INIT_POINTER(tp->root, NULL);
+       kfree_rcu(head, rcu);
+       return true;
+ }
+--- a/net/sched/cls_rsvp.h
++++ b/net/sched/cls_rsvp.h
+@@ -152,7 +152,8 @@ static int rsvp_classify(struct sk_buff
+               return -1;
+       nhptr = ip_hdr(skb);
+ #endif
+-
++      if (unlikely(!head))
++              return -1;
+ restart:
+ #if RSVP_DST_LEN == 4
+--- a/net/sched/cls_tcindex.c
++++ b/net/sched/cls_tcindex.c
+@@ -503,7 +503,6 @@ static bool tcindex_destroy(struct tcf_p
+       walker.fn = tcindex_destroy_element;
+       tcindex_walk(tp, &walker);
+-      RCU_INIT_POINTER(tp->root, NULL);
+       call_rcu(&p->rcu, __tcindex_destroy);
+       return true;
+ }
diff --git a/queue-4.8/net-sky2-fix-shutdown-crash.patch b/queue-4.8/net-sky2-fix-shutdown-crash.patch
new file mode 100644 (file)
index 0000000..a205f44
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Jeremy Linton <jeremy.linton@arm.com>
+Date: Thu, 17 Nov 2016 09:14:25 -0600
+Subject: net: sky2: Fix shutdown crash
+
+From: Jeremy Linton <jeremy.linton@arm.com>
+
+
+[ Upstream commit 06ba3b2133dc203e1e9bc36cee7f0839b79a9e8b ]
+
+The sky2 frequently crashes during machine shutdown with:
+
+sky2_get_stats+0x60/0x3d8 [sky2]
+dev_get_stats+0x68/0xd8
+rtnl_fill_stats+0x54/0x140
+rtnl_fill_ifinfo+0x46c/0xc68
+rtmsg_ifinfo_build_skb+0x7c/0xf0
+rtmsg_ifinfo.part.22+0x3c/0x70
+rtmsg_ifinfo+0x50/0x5c
+netdev_state_change+0x4c/0x58
+linkwatch_do_dev+0x50/0x88
+__linkwatch_run_queue+0x104/0x1a4
+linkwatch_event+0x30/0x3c
+process_one_work+0x140/0x3e0
+worker_thread+0x60/0x44c
+kthread+0xdc/0xf0
+ret_from_fork+0x10/0x50
+
+This is caused by the sky2 being called after it has been shutdown.
+A previous thread about this can be found here:
+
+https://lkml.org/lkml/2016/4/12/410
+
+An alternative fix is to assure that IFF_UP gets cleared by
+calling dev_close() during shutdown. This is similar to what the
+bnx2/tg3/xgene and maybe others are doing to assure that the driver
+isn't being called following _shutdown().
+
+Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/sky2.c |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/drivers/net/ethernet/marvell/sky2.c
++++ b/drivers/net/ethernet/marvell/sky2.c
+@@ -5220,6 +5220,19 @@ static SIMPLE_DEV_PM_OPS(sky2_pm_ops, sk
+ static void sky2_shutdown(struct pci_dev *pdev)
+ {
++      struct sky2_hw *hw = pci_get_drvdata(pdev);
++      int port;
++
++      for (port = 0; port < hw->ports; port++) {
++              struct net_device *ndev = hw->dev[port];
++
++              rtnl_lock();
++              if (netif_running(ndev)) {
++                      dev_close(ndev);
++                      netif_device_detach(ndev);
++              }
++              rtnl_unlock();
++      }
+       sky2_suspend(&pdev->dev);
+       pci_wake_from_d3(pdev, device_may_wakeup(&pdev->dev));
+       pci_set_power_state(pdev, PCI_D3hot);
diff --git a/queue-4.8/netlink-call-cb-done-from-a-worker-thread.patch b/queue-4.8/netlink-call-cb-done-from-a-worker-thread.patch
new file mode 100644 (file)
index 0000000..0dcfad0
--- /dev/null
@@ -0,0 +1,91 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Mon, 28 Nov 2016 19:22:12 +0800
+Subject: netlink: Call cb->done from a worker thread
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+
+[ Upstream commit 707693c8a498697aa8db240b93eb76ec62e30892 ]
+
+The cb->done interface expects to be called in process context.
+This was broken by the netlink RCU conversion.  This patch fixes
+it by adding a worker struct to make the cb->done call where
+necessary.
+
+Fixes: 21e4902aea80 ("netlink: Lockless lookup with RCU grace...")
+Reported-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |   27 +++++++++++++++++++++++----
+ net/netlink/af_netlink.h |    2 ++
+ 2 files changed, 25 insertions(+), 4 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -322,14 +322,11 @@ static void netlink_skb_set_owner_r(stru
+       sk_mem_charge(sk, skb->truesize);
+ }
+-static void netlink_sock_destruct(struct sock *sk)
++static void __netlink_sock_destruct(struct sock *sk)
+ {
+       struct netlink_sock *nlk = nlk_sk(sk);
+       if (nlk->cb_running) {
+-              if (nlk->cb.done)
+-                      nlk->cb.done(&nlk->cb);
+-
+               module_put(nlk->cb.module);
+               kfree_skb(nlk->cb.skb);
+       }
+@@ -346,6 +343,28 @@ static void netlink_sock_destruct(struct
+       WARN_ON(nlk_sk(sk)->groups);
+ }
++static void netlink_sock_destruct_work(struct work_struct *work)
++{
++      struct netlink_sock *nlk = container_of(work, struct netlink_sock,
++                                              work);
++
++      nlk->cb.done(&nlk->cb);
++      __netlink_sock_destruct(&nlk->sk);
++}
++
++static void netlink_sock_destruct(struct sock *sk)
++{
++      struct netlink_sock *nlk = nlk_sk(sk);
++
++      if (nlk->cb_running && nlk->cb.done) {
++              INIT_WORK(&nlk->work, netlink_sock_destruct_work);
++              schedule_work(&nlk->work);
++              return;
++      }
++
++      __netlink_sock_destruct(sk);
++}
++
+ /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
+  * SMP. Look, when several writers sleep and reader wakes them up, all but one
+  * immediately hit write lock and grab all the cpus. Exclusive sleep solves
+--- a/net/netlink/af_netlink.h
++++ b/net/netlink/af_netlink.h
+@@ -3,6 +3,7 @@
+ #include <linux/rhashtable.h>
+ #include <linux/atomic.h>
++#include <linux/workqueue.h>
+ #include <net/sock.h>
+ #define NLGRPSZ(x)    (ALIGN(x, sizeof(unsigned long) * 8) / 8)
+@@ -33,6 +34,7 @@ struct netlink_sock {
+       struct rhash_head       node;
+       struct rcu_head         rcu;
++      struct work_struct      work;
+ };
+ static inline struct netlink_sock *nlk_sk(struct sock *sk)
diff --git a/queue-4.8/netlink-do-not-schedule-work-from-sk_destruct.patch b/queue-4.8/netlink-do-not-schedule-work-from-sk_destruct.patch
new file mode 100644 (file)
index 0000000..6889c04
--- /dev/null
@@ -0,0 +1,87 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Mon, 5 Dec 2016 15:28:21 +0800
+Subject: netlink: Do not schedule work from sk_destruct
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+
+[ Upstream commit ed5d7788a934a4b6d6d025e948ed4da496b4f12e ]
+
+It is wrong to schedule a work from sk_destruct using the socket
+as the memory reserve because the socket will be freed immediately
+after the return from sk_destruct.
+
+Instead we should do the deferral prior to sk_free.
+
+This patch does just that.
+
+Fixes: 707693c8a498 ("netlink: Call cb->done from a worker thread")
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Tested-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |   32 +++++++++++++++-----------------
+ 1 file changed, 15 insertions(+), 17 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -322,11 +322,13 @@ static void netlink_skb_set_owner_r(stru
+       sk_mem_charge(sk, skb->truesize);
+ }
+-static void __netlink_sock_destruct(struct sock *sk)
++static void netlink_sock_destruct(struct sock *sk)
+ {
+       struct netlink_sock *nlk = nlk_sk(sk);
+       if (nlk->cb_running) {
++              if (nlk->cb.done)
++                      nlk->cb.done(&nlk->cb);
+               module_put(nlk->cb.module);
+               kfree_skb(nlk->cb.skb);
+       }
+@@ -348,21 +350,7 @@ static void netlink_sock_destruct_work(s
+       struct netlink_sock *nlk = container_of(work, struct netlink_sock,
+                                               work);
+-      nlk->cb.done(&nlk->cb);
+-      __netlink_sock_destruct(&nlk->sk);
+-}
+-
+-static void netlink_sock_destruct(struct sock *sk)
+-{
+-      struct netlink_sock *nlk = nlk_sk(sk);
+-
+-      if (nlk->cb_running && nlk->cb.done) {
+-              INIT_WORK(&nlk->work, netlink_sock_destruct_work);
+-              schedule_work(&nlk->work);
+-              return;
+-      }
+-
+-      __netlink_sock_destruct(sk);
++      sk_free(&nlk->sk);
+ }
+ /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
+@@ -667,8 +655,18 @@ out_module:
+ static void deferred_put_nlk_sk(struct rcu_head *head)
+ {
+       struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
++      struct sock *sk = &nlk->sk;
++
++      if (!atomic_dec_and_test(&sk->sk_refcnt))
++              return;
++
++      if (nlk->cb_running && nlk->cb.done) {
++              INIT_WORK(&nlk->work, netlink_sock_destruct_work);
++              schedule_work(&nlk->work);
++              return;
++      }
+-      sock_put(&nlk->sk);
++      sk_free(sk);
+ }
+ static int netlink_release(struct socket *sock)
diff --git a/queue-4.8/packet-fix-race-condition-in-packet_set_ring.patch b/queue-4.8/packet-fix-race-condition-in-packet_set_ring.patch
new file mode 100644 (file)
index 0000000..b5e2700
--- /dev/null
@@ -0,0 +1,93 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Philip Pettersson <philip.pettersson@gmail.com>
+Date: Wed, 30 Nov 2016 14:55:36 -0800
+Subject: packet: fix race condition in packet_set_ring
+
+From: Philip Pettersson <philip.pettersson@gmail.com>
+
+
+[ Upstream commit 84ac7260236a49c79eede91617700174c2c19b0c ]
+
+When packet_set_ring creates a ring buffer it will initialize a
+struct timer_list if the packet version is TPACKET_V3. This value
+can then be raced by a different thread calling setsockopt to
+set the version to TPACKET_V1 before packet_set_ring has finished.
+
+This leads to a use-after-free on a function pointer in the
+struct timer_list when the socket is closed as the previously
+initialized timer will not be deleted.
+
+The bug is fixed by taking lock_sock(sk) in packet_setsockopt when
+changing the packet version while also taking the lock at the start
+of packet_set_ring.
+
+Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.")
+Signed-off-by: Philip Pettersson <philip.pettersson@gmail.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |   18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -3648,19 +3648,25 @@ packet_setsockopt(struct socket *sock, i
+               if (optlen != sizeof(val))
+                       return -EINVAL;
+-              if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
+-                      return -EBUSY;
+               if (copy_from_user(&val, optval, sizeof(val)))
+                       return -EFAULT;
+               switch (val) {
+               case TPACKET_V1:
+               case TPACKET_V2:
+               case TPACKET_V3:
+-                      po->tp_version = val;
+-                      return 0;
++                      break;
+               default:
+                       return -EINVAL;
+               }
++              lock_sock(sk);
++              if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
++                      ret = -EBUSY;
++              } else {
++                      po->tp_version = val;
++                      ret = 0;
++              }
++              release_sock(sk);
++              return ret;
+       }
+       case PACKET_RESERVE:
+       {
+@@ -4164,6 +4170,7 @@ static int packet_set_ring(struct sock *
+       /* Added to avoid minimal code churn */
+       struct tpacket_req *req = &req_u->req;
++      lock_sock(sk);
+       /* Opening a Tx-ring is NOT supported in TPACKET_V3 */
+       if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
+               net_warn_ratelimited("Tx-ring is not supported.\n");
+@@ -4245,7 +4252,6 @@ static int packet_set_ring(struct sock *
+                       goto out;
+       }
+-      lock_sock(sk);
+       /* Detach socket from network */
+       spin_lock(&po->bind_lock);
+@@ -4294,11 +4300,11 @@ static int packet_set_ring(struct sock *
+               if (!tx_ring)
+                       prb_shutdown_retire_blk_timer(po, rb_queue);
+       }
+-      release_sock(sk);
+       if (pg_vec)
+               free_pg_vec(pg_vec, order, req->tp_block_nr);
+ out:
++      release_sock(sk);
+       return err;
+ }
diff --git a/queue-4.8/rtnetlink-fix-fdb-size-computation.patch b/queue-4.8/rtnetlink-fix-fdb-size-computation.patch
new file mode 100644 (file)
index 0000000..c8b0ff9
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Fri, 18 Nov 2016 15:50:39 +0100
+Subject: rtnetlink: fix FDB size computation
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+
+[ Upstream commit f82ef3e10a870acc19fa04f80ef5877eaa26f41e ]
+
+Add missing NDA_VLAN attribute's size.
+
+Fixes: 1e53d5bb8878 ("net: Pass VLAN ID to rtnl_fdb_notify.")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -2791,7 +2791,10 @@ nla_put_failure:
+ static inline size_t rtnl_fdb_nlmsg_size(void)
+ {
+-      return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN);
++      return NLMSG_ALIGN(sizeof(struct ndmsg)) +
++             nla_total_size(ETH_ALEN) +       /* NDA_LLADDR */
++             nla_total_size(sizeof(u16)) +    /* NDA_VLAN */
++             0;
+ }
+ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type,
diff --git a/queue-4.8/rtnl-fix-the-loop-index-update-error-in-rtnl_dump_ifinfo.patch b/queue-4.8/rtnl-fix-the-loop-index-update-error-in-rtnl_dump_ifinfo.patch
new file mode 100644 (file)
index 0000000..73ef1cc
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
+Date: Sat, 19 Nov 2016 23:28:32 +0800
+Subject: rtnl: fix the loop index update error in rtnl_dump_ifinfo()
+
+From: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
+
+
+[ Upstream commit 3f0ae05d6fea0ed5b19efdbc9c9f8e02685a3af3 ]
+
+If the link is filtered out, loop index should also be updated. If not,
+loop index will not be correct.
+
+Fixes: dc599f76c22b0 ("net: Add support for filtering link dump by master device and kind")
+Signed-off-by: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
+Acked-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -1578,7 +1578,7 @@ static int rtnl_dump_ifinfo(struct sk_bu
+               head = &net->dev_index_head[h];
+               hlist_for_each_entry(dev, head, index_hlist) {
+                       if (link_dump_filtered(dev, master_idx, kind_ops))
+-                              continue;
++                              goto cont;
+                       if (idx < s_idx)
+                               goto cont;
+                       err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
diff --git a/queue-4.8/sh_eth-remove-unchecked-interrupts-for-rz-a1.patch b/queue-4.8/sh_eth-remove-unchecked-interrupts-for-rz-a1.patch
new file mode 100644 (file)
index 0000000..b55ff05
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Chris Brandt <chris.brandt@renesas.com>
+Date: Thu, 1 Dec 2016 13:32:14 -0500
+Subject: sh_eth: remove unchecked interrupts for RZ/A1
+
+From: Chris Brandt <chris.brandt@renesas.com>
+
+
+[ Upstream commit 33d446dbba4d4d6a77e1e900d434fa99e0f02c86 ]
+
+When streaming a lot of data and the RZ/A1 can't keep up, some status bits
+will get set that are not being checked or cleared which cause the
+following messages and the Ethernet driver to stop working. This
+patch fixes that issue.
+
+irq 21: nobody cared (try booting with the "irqpoll" option)
+handlers:
+[<c036b71c>] sh_eth_interrupt
+Disabling IRQ #21
+
+Fixes: db893473d313a4ad ("sh_eth: Add support for r7s72100")
+Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
+Acked-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/renesas/sh_eth.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/renesas/sh_eth.c
++++ b/drivers/net/ethernet/renesas/sh_eth.c
+@@ -518,7 +518,7 @@ static struct sh_eth_cpu_data r7s72100_d
+       .ecsr_value     = ECSR_ICD,
+       .ecsipr_value   = ECSIPR_ICDIP,
+-      .eesipr_value   = 0xff7f009f,
++      .eesipr_value   = 0xe77f009f,
+       .tx_check       = EESR_TC1 | EESR_FTC,
+       .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT |
diff --git a/queue-4.8/sparc32-fix-inverted-invalid_frame_pointer-checks-on-sigreturns.patch b/queue-4.8/sparc32-fix-inverted-invalid_frame_pointer-checks-on-sigreturns.patch
new file mode 100644 (file)
index 0000000..d3de83e
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Thu Dec  8 07:19:56 CET 2016
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Wed, 9 Nov 2016 10:43:05 +0100
+Subject: sparc32: Fix inverted invalid_frame_pointer checks on sigreturns
+
+From: Andreas Larsson <andreas@gaisler.com>
+
+
+[ Upstream commit 07b5ab3f71d318e52c18cc3b73c1d44c908aacfa ]
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/kernel/signal_32.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/sparc/kernel/signal_32.c
++++ b/arch/sparc/kernel/signal_32.c
+@@ -89,7 +89,7 @@ asmlinkage void do_sigreturn(struct pt_r
+       sf = (struct signal_frame __user *) regs->u_regs[UREG_FP];
+       /* 1. Make sure we are not getting garbage from the user */
+-      if (!invalid_frame_pointer(sf, sizeof(*sf)))
++      if (invalid_frame_pointer(sf, sizeof(*sf)))
+               goto segv_and_exit;
+       if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
+@@ -150,7 +150,7 @@ asmlinkage void do_rt_sigreturn(struct p
+       synchronize_user_stack();
+       sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP];
+-      if (!invalid_frame_pointer(sf, sizeof(*sf)))
++      if (invalid_frame_pointer(sf, sizeof(*sf)))
+               goto segv;
+       if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
diff --git a/queue-4.8/sparc64-fix-compile-warning-section-mismatch-in-find_node.patch b/queue-4.8/sparc64-fix-compile-warning-section-mismatch-in-find_node.patch
new file mode 100644 (file)
index 0000000..43c6250
--- /dev/null
@@ -0,0 +1,52 @@
+From foo@baz Thu Dec  8 07:19:56 CET 2016
+From: Thomas Tai <thomas.tai@oracle.com>
+Date: Fri, 11 Nov 2016 16:41:00 -0800
+Subject: sparc64: fix compile warning section mismatch in find_node()
+
+From: Thomas Tai <thomas.tai@oracle.com>
+
+
+[ Upstream commit 87a349f9cc0908bc0cfac0c9ece3179f650ae95a ]
+
+A compile warning is introduced by a commit to fix the find_node().
+This patch fix the compile warning by moving find_node() into __init
+section. Because find_node() is only used by memblock_nid_range() which
+is only used by a __init add_node_ranges(). find_node() and
+memblock_nid_range() should also be inside __init section.
+
+Signed-off-by: Thomas Tai <thomas.tai@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/mm/init_64.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -805,7 +805,7 @@ static int num_mblocks;
+ static int find_numa_node_for_addr(unsigned long pa,
+                                  struct node_mem_mask *pnode_mask);
+-static unsigned long ra_to_pa(unsigned long addr)
++static unsigned long __init ra_to_pa(unsigned long addr)
+ {
+       int i;
+@@ -821,7 +821,7 @@ static unsigned long ra_to_pa(unsigned l
+       return addr;
+ }
+-static int find_node(unsigned long addr)
++static int __init find_node(unsigned long addr)
+ {
+       static bool search_mdesc = true;
+       static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL };
+@@ -858,7 +858,7 @@ static int find_node(unsigned long addr)
+       return last_index;
+ }
+-static u64 memblock_nid_range(u64 start, u64 end, int *nid)
++static u64 __init memblock_nid_range(u64 start, u64 end, int *nid)
+ {
+       *nid = find_node(start);
+       start += PAGE_SIZE;
diff --git a/queue-4.8/sparc64-fix-find_node-warning-if-numa-node-cannot-be-found.patch b/queue-4.8/sparc64-fix-find_node-warning-if-numa-node-cannot-be-found.patch
new file mode 100644 (file)
index 0000000..405e591
--- /dev/null
@@ -0,0 +1,148 @@
+From foo@baz Thu Dec  8 07:19:56 CET 2016
+From: Thomas Tai <thomas.tai@oracle.com>
+Date: Thu, 3 Nov 2016 09:19:01 -0700
+Subject: sparc64: Fix find_node warning if numa node cannot be found
+
+From: Thomas Tai <thomas.tai@oracle.com>
+
+
+[ Upstream commit 74a5ed5c4f692df2ff0a2313ea71e81243525519 ]
+
+When booting up LDOM, find_node() warns that a physical address
+doesn't match a NUMA node.
+
+WARNING: CPU: 0 PID: 0 at arch/sparc/mm/init_64.c:835
+find_node+0xf4/0x120 find_node: A physical address doesn't
+match a NUMA node rule. Some physical memory will be
+owned by node 0.Modules linked in:
+
+CPU: 0 PID: 0 Comm: swapper Not tainted 4.9.0-rc3 #4
+Call Trace:
+ [0000000000468ba0] __warn+0xc0/0xe0
+ [0000000000468c74] warn_slowpath_fmt+0x34/0x60
+ [00000000004592f4] find_node+0xf4/0x120
+ [0000000000dd0774] add_node_ranges+0x38/0xe4
+ [0000000000dd0b1c] numa_parse_mdesc+0x268/0x2e4
+ [0000000000dd0e9c] bootmem_init+0xb8/0x160
+ [0000000000dd174c] paging_init+0x808/0x8fc
+ [0000000000dcb0d0] setup_arch+0x2c8/0x2f0
+ [0000000000dc68a0] start_kernel+0x48/0x424
+ [0000000000dcb374] start_early_boot+0x27c/0x28c
+ [0000000000a32c08] tlb_fixup_done+0x4c/0x64
+ [0000000000027f08] 0x27f08
+
+It is because linux use an internal structure node_masks[] to
+keep the best memory latency node only. However, LDOM mdesc can
+contain single latency-group with multiple memory latency nodes.
+
+If the address doesn't match the best latency node within
+node_masks[], it should check for an alternative via mdesc.
+The warning message should only be printed if the address
+doesn't match any node_masks[] nor within mdesc. To minimize
+the impact of searching mdesc every time, the last matched
+mask and index is stored in a variable.
+
+Signed-off-by: Thomas Tai <thomas.tai@oracle.com>
+Reviewed-by: Chris Hyser <chris.hyser@oracle.com>
+Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/mm/init_64.c |   65 +++++++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 61 insertions(+), 4 deletions(-)
+
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -802,6 +802,8 @@ struct mdesc_mblock {
+ };
+ static struct mdesc_mblock *mblocks;
+ static int num_mblocks;
++static int find_numa_node_for_addr(unsigned long pa,
++                                 struct node_mem_mask *pnode_mask);
+ static unsigned long ra_to_pa(unsigned long addr)
+ {
+@@ -821,6 +823,9 @@ static unsigned long ra_to_pa(unsigned l
+ static int find_node(unsigned long addr)
+ {
++      static bool search_mdesc = true;
++      static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL };
++      static int last_index;
+       int i;
+       addr = ra_to_pa(addr);
+@@ -830,10 +835,27 @@ static int find_node(unsigned long addr)
+               if ((addr & p->mask) == p->val)
+                       return i;
+       }
+-      /* The following condition has been observed on LDOM guests.*/
+-      WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node"
+-              " rule. Some physical memory will be owned by node 0.");
+-      return 0;
++      /* The following condition has been observed on LDOM guests because
++       * node_masks only contains the best latency mask and value.
++       * LDOM guest's mdesc can contain a single latency group to
++       * cover multiple address range. Print warning message only if the
++       * address cannot be found in node_masks nor mdesc.
++       */
++      if ((search_mdesc) &&
++          ((addr & last_mem_mask.mask) != last_mem_mask.val)) {
++              /* find the available node in the mdesc */
++              last_index = find_numa_node_for_addr(addr, &last_mem_mask);
++              numadbg("find_node: latency group for address 0x%lx is %d\n",
++                      addr, last_index);
++              if ((last_index < 0) || (last_index >= num_node_masks)) {
++                      /* WARN_ONCE() and use default group 0 */
++                      WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0.");
++                      search_mdesc = false;
++                      last_index = 0;
++              }
++      }
++
++      return last_index;
+ }
+ static u64 memblock_nid_range(u64 start, u64 end, int *nid)
+@@ -1160,6 +1182,41 @@ int __node_distance(int from, int to)
+       return numa_latency[from][to];
+ }
++static int find_numa_node_for_addr(unsigned long pa,
++                                 struct node_mem_mask *pnode_mask)
++{
++      struct mdesc_handle *md = mdesc_grab();
++      u64 node, arc;
++      int i = 0;
++
++      node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
++      if (node == MDESC_NODE_NULL)
++              goto out;
++
++      mdesc_for_each_node_by_name(md, node, "group") {
++              mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) {
++                      u64 target = mdesc_arc_target(md, arc);
++                      struct mdesc_mlgroup *m = find_mlgroup(target);
++
++                      if (!m)
++                              continue;
++                      if ((pa & m->mask) == m->match) {
++                              if (pnode_mask) {
++                                      pnode_mask->mask = m->mask;
++                                      pnode_mask->val = m->match;
++                              }
++                              mdesc_release(md);
++                              return i;
++                      }
++              }
++              i++;
++      }
++
++out:
++      mdesc_release(md);
++      return -1;
++}
++
+ static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
+ {
+       int i;
diff --git a/queue-4.8/tipc-check-minimum-bearer-mtu.patch b/queue-4.8/tipc-check-minimum-bearer-mtu.patch
new file mode 100644 (file)
index 0000000..115fae1
--- /dev/null
@@ -0,0 +1,116 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Michal Kubeček <mkubecek@suse.cz>
+Date: Fri, 2 Dec 2016 09:33:41 +0100
+Subject: tipc: check minimum bearer MTU
+
+From: Michal Kubeček <mkubecek@suse.cz>
+
+
+[ Upstream commit 3de81b758853f0b29c61e246679d20b513c4cfec ]
+
+Qian Zhang (张谦) reported a potential socket buffer overflow in
+tipc_msg_build() which is also known as CVE-2016-8632: due to
+insufficient checks, a buffer overflow can occur if MTU is too short for
+even tipc headers. As anyone can set device MTU in a user/net namespace,
+this issue can be abused by a regular user.
+
+As agreed in the discussion on Ben Hutchings' original patch, we should
+check the MTU at the moment a bearer is attached rather than for each
+processed packet. We also need to repeat the check when bearer MTU is
+adjusted to new device MTU. UDP case also needs a check to avoid
+overflow when calculating bearer MTU.
+
+Fixes: b97bf3fd8f6a ("[TIPC] Initial merge")
+Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
+Reported-by: Qian Zhang (张谦) <zhangqian-c@360.cn>
+Acked-by: Ying Xue <ying.xue@windriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/bearer.c    |   11 +++++++++--
+ net/tipc/bearer.h    |   13 +++++++++++++
+ net/tipc/udp_media.c |    5 +++++
+ 3 files changed, 27 insertions(+), 2 deletions(-)
+
+--- a/net/tipc/bearer.c
++++ b/net/tipc/bearer.c
+@@ -402,6 +402,10 @@ int tipc_enable_l2_media(struct net *net
+       dev = dev_get_by_name(net, driver_name);
+       if (!dev)
+               return -ENODEV;
++      if (tipc_mtu_bad(dev, 0)) {
++              dev_put(dev);
++              return -EINVAL;
++      }
+       /* Associate TIPC bearer with L2 bearer */
+       rcu_assign_pointer(b->media_ptr, dev);
+@@ -606,8 +610,6 @@ static int tipc_l2_device_event(struct n
+       if (!b)
+               return NOTIFY_DONE;
+-      b->mtu = dev->mtu;
+-
+       switch (evt) {
+       case NETDEV_CHANGE:
+               if (netif_carrier_ok(dev))
+@@ -621,6 +623,11 @@ static int tipc_l2_device_event(struct n
+               tipc_reset_bearer(net, b);
+               break;
+       case NETDEV_CHANGEMTU:
++              if (tipc_mtu_bad(dev, 0)) {
++                      bearer_disable(net, b);
++                      break;
++              }
++              b->mtu = dev->mtu;
+               tipc_reset_bearer(net, b);
+               break;
+       case NETDEV_CHANGEADDR:
+--- a/net/tipc/bearer.h
++++ b/net/tipc/bearer.h
+@@ -39,6 +39,7 @@
+ #include "netlink.h"
+ #include "core.h"
++#include "msg.h"
+ #include <net/genetlink.h>
+ #define MAX_MEDIA     3
+@@ -59,6 +60,9 @@
+ #define TIPC_MEDIA_TYPE_IB    2
+ #define TIPC_MEDIA_TYPE_UDP   3
++/* minimum bearer MTU */
++#define TIPC_MIN_BEARER_MTU   (MAX_H_SIZE + INT_H_SIZE)
++
+ /**
+  * struct tipc_media_addr - destination address used by TIPC bearers
+  * @value: address info (format defined by media)
+@@ -213,4 +217,13 @@ void tipc_bearer_xmit(struct net *net, u
+ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
+                        struct sk_buff_head *xmitq);
++/* check if device MTU is too low for tipc headers */
++static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve)
++{
++      if (dev->mtu >= TIPC_MIN_BEARER_MTU + reserve)
++              return false;
++      netdev_warn(dev, "MTU too low for tipc bearer\n");
++      return true;
++}
++
+ #endif        /* _TIPC_BEARER_H */
+--- a/net/tipc/udp_media.c
++++ b/net/tipc/udp_media.c
+@@ -372,6 +372,11 @@ static int tipc_udp_enable(struct net *n
+               udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
+               udp_conf.use_udp_checksums = false;
+               ub->ifindex = dev->ifindex;
++              if (tipc_mtu_bad(dev, sizeof(struct iphdr) +
++                                    sizeof(struct udphdr))) {
++                      err = -EINVAL;
++                      goto err;
++              }
+               b->mtu = dev->mtu - sizeof(struct iphdr)
+                       - sizeof(struct udphdr);
+ #if IS_ENABLED(CONFIG_IPV6)
diff --git a/queue-4.8/udplite-call-proper-backlog-handlers.patch b/queue-4.8/udplite-call-proper-backlog-handlers.patch
new file mode 100644 (file)
index 0000000..9f0d9cb
--- /dev/null
@@ -0,0 +1,108 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 22 Nov 2016 09:06:45 -0800
+Subject: udplite: call proper backlog handlers
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 30c7be26fd3587abcb69587f781098e3ca2d565b ]
+
+In commits 93821778def10 ("udp: Fix rcv socket locking") and
+f7ad74fef3af ("net/ipv6/udp: UDP encapsulation: break backlog_rcv into
+__udpv6_queue_rcv_skb") UDP backlog handlers were renamed, but UDPlite
+was forgotten.
+
+This leads to crashes if UDPlite header is pulled twice, which happens
+starting from commit e6afc8ace6dd ("udp: remove headers from UDP packets
+before queueing")
+
+Bug found by syzkaller team, thanks a lot guys !
+
+Note that backlog use in UDP/UDPlite is scheduled to be removed starting
+from linux-4.10, so this patch is only needed up to linux-4.9
+
+Fixes: 93821778def1 ("udp: Fix rcv socket locking")
+Fixes: f7ad74fef3af ("net/ipv6/udp: UDP encapsulation: break backlog_rcv into __udpv6_queue_rcv_skb")
+Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Cc: Benjamin LaHaise <bcrl@kvack.org>
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c      |    2 +-
+ net/ipv4/udp_impl.h |    2 +-
+ net/ipv4/udplite.c  |    2 +-
+ net/ipv6/udp.c      |    2 +-
+ net/ipv6/udp_impl.h |    2 +-
+ net/ipv6/udplite.c  |    2 +-
+ 6 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1451,7 +1451,7 @@ static void udp_v4_rehash(struct sock *s
+       udp_lib_rehash(sk, new_hash);
+ }
+-static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
++int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ {
+       int rc;
+--- a/net/ipv4/udp_impl.h
++++ b/net/ipv4/udp_impl.h
+@@ -25,7 +25,7 @@ int udp_recvmsg(struct sock *sk, struct
+               int flags, int *addr_len);
+ int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
+                int flags);
+-int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
++int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+ void udp_destroy_sock(struct sock *sk);
+ #ifdef CONFIG_PROC_FS
+--- a/net/ipv4/udplite.c
++++ b/net/ipv4/udplite.c
+@@ -50,7 +50,7 @@ struct proto         udplite_prot = {
+       .sendmsg           = udp_sendmsg,
+       .recvmsg           = udp_recvmsg,
+       .sendpage          = udp_sendpage,
+-      .backlog_rcv       = udp_queue_rcv_skb,
++      .backlog_rcv       = __udp_queue_rcv_skb,
+       .hash              = udp_lib_hash,
+       .unhash            = udp_lib_unhash,
+       .get_port          = udp_v4_get_port,
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -514,7 +514,7 @@ out:
+       return;
+ }
+-static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
++int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+ {
+       int rc;
+--- a/net/ipv6/udp_impl.h
++++ b/net/ipv6/udp_impl.h
+@@ -26,7 +26,7 @@ int compat_udpv6_getsockopt(struct sock
+ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
+ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
+                 int flags, int *addr_len);
+-int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
++int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+ void udpv6_destroy_sock(struct sock *sk);
+ void udp_v6_clear_sk(struct sock *sk, int size);
+--- a/net/ipv6/udplite.c
++++ b/net/ipv6/udplite.c
+@@ -45,7 +45,7 @@ struct proto udplitev6_prot = {
+       .getsockopt        = udpv6_getsockopt,
+       .sendmsg           = udpv6_sendmsg,
+       .recvmsg           = udpv6_recvmsg,
+-      .backlog_rcv       = udpv6_queue_rcv_skb,
++      .backlog_rcv       = __udpv6_queue_rcv_skb,
+       .hash              = udp_lib_hash,
+       .unhash            = udp_lib_unhash,
+       .get_port          = udp_v6_get_port,
diff --git a/queue-4.8/virtio-net-add-a-missing-synchronize_net.patch b/queue-4.8/virtio-net-add-a-missing-synchronize_net.patch
new file mode 100644 (file)
index 0000000..c2a03cb
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Thu Dec  8 07:19:12 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 15 Nov 2016 22:24:12 -0800
+Subject: virtio-net: add a missing synchronize_net()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 963abe5c8a0273a1cf5913556da1b1189de0e57a ]
+
+It seems many drivers do not respect napi_hash_del() contract.
+
+When napi_hash_del() is used before netif_napi_del(), an RCU grace
+period is needed before freeing NAPI object.
+
+Fixes: 91815639d880 ("virtio-net: rx busy polling support")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Jason Wang <jasowang@redhat.com>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -1468,6 +1468,11 @@ static void virtnet_free_queues(struct v
+               netif_napi_del(&vi->rq[i].napi);
+       }
++      /* We called napi_hash_del() before netif_napi_del(),
++       * we need to respect an RCU grace period before freeing vi->rq
++       */
++      synchronize_net();
++
+       kfree(vi->rq);
+       kfree(vi->sq);
+ }