]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.7-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 21 Sep 2016 08:05:37 +0000 (10:05 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 21 Sep 2016 08:05:37 +0000 (10:05 +0200)
added patches:
af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch
bnxt_en-fix-tx-push-operation-on-arm64.patch
bonding-fix-bonding-crash.patch
bpf-fix-method-of-ptr_to_packet-reg-id-generation.patch
bpf-fix-write-helpers-with-regards-to-non-linear-parts.patch
bridge-re-introduce-fix-parsing-of-mldv2-reports.patch
ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch
ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch
ipv6-release-dst-in-ping_v6_sendmsg.patch
kcm-fix-a-socket-double-free.patch
net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch
net-ipv6-do-not-keep-ipv6-addresses-when-ipv6-is-disabled.patch
net-irda-handle-iriap_register_lsap-allocation-failure.patch
net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch
net-mlx5-fix-pci-error-recovery-flow.patch
net-mlx5e-fix-parsing-of-vlan-packets-when-updating-lro-header.patch
net-mlx5e-use-correct-flow-dissector-key-on-flower-offloading.patch
net-sched-fix-encoding-to-use-real-length.patch
net-sctp-always-initialise-sctp_ht_iter-start_fail.patch
net-smc91x-fix-smc-accesses.patch
revert-af_unix-fix-splice-bind-deadlock.patch
revert-phy-irq-cannot-be-shared.patch
sctp-fix-overrun-in-sctp_diag_dump_one.patch
tcp-cwnd-does-not-increase-in-tcp-yeah.patch
tcp-fastopen-avoid-negative-sk_forward_alloc.patch
tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch
tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch
tipc-fix-null-pointer-dereference-in-shutdown.patch
tun-fix-transmit-timestamp-support.patch
udp-fix-poll-issue-with-zero-sized-packets.patch
vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch

32 files changed:
queue-4.7/af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch [new file with mode: 0644]
queue-4.7/bnxt_en-fix-tx-push-operation-on-arm64.patch [new file with mode: 0644]
queue-4.7/bonding-fix-bonding-crash.patch [new file with mode: 0644]
queue-4.7/bpf-fix-method-of-ptr_to_packet-reg-id-generation.patch [new file with mode: 0644]
queue-4.7/bpf-fix-write-helpers-with-regards-to-non-linear-parts.patch [new file with mode: 0644]
queue-4.7/bridge-re-introduce-fix-parsing-of-mldv2-reports.patch [new file with mode: 0644]
queue-4.7/ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch [new file with mode: 0644]
queue-4.7/ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch [new file with mode: 0644]
queue-4.7/ipv6-release-dst-in-ping_v6_sendmsg.patch [new file with mode: 0644]
queue-4.7/kcm-fix-a-socket-double-free.patch [new file with mode: 0644]
queue-4.7/net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch [new file with mode: 0644]
queue-4.7/net-ipv6-do-not-keep-ipv6-addresses-when-ipv6-is-disabled.patch [new file with mode: 0644]
queue-4.7/net-irda-handle-iriap_register_lsap-allocation-failure.patch [new file with mode: 0644]
queue-4.7/net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch [new file with mode: 0644]
queue-4.7/net-mlx5-fix-pci-error-recovery-flow.patch [new file with mode: 0644]
queue-4.7/net-mlx5e-fix-parsing-of-vlan-packets-when-updating-lro-header.patch [new file with mode: 0644]
queue-4.7/net-mlx5e-use-correct-flow-dissector-key-on-flower-offloading.patch [new file with mode: 0644]
queue-4.7/net-sched-fix-encoding-to-use-real-length.patch [new file with mode: 0644]
queue-4.7/net-sctp-always-initialise-sctp_ht_iter-start_fail.patch [new file with mode: 0644]
queue-4.7/net-smc91x-fix-smc-accesses.patch [new file with mode: 0644]
queue-4.7/revert-af_unix-fix-splice-bind-deadlock.patch [new file with mode: 0644]
queue-4.7/revert-phy-irq-cannot-be-shared.patch [new file with mode: 0644]
queue-4.7/sctp-fix-overrun-in-sctp_diag_dump_one.patch [new file with mode: 0644]
queue-4.7/series
queue-4.7/tcp-cwnd-does-not-increase-in-tcp-yeah.patch [new file with mode: 0644]
queue-4.7/tcp-fastopen-avoid-negative-sk_forward_alloc.patch [new file with mode: 0644]
queue-4.7/tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch [new file with mode: 0644]
queue-4.7/tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch [new file with mode: 0644]
queue-4.7/tipc-fix-null-pointer-dereference-in-shutdown.patch [new file with mode: 0644]
queue-4.7/tun-fix-transmit-timestamp-support.patch [new file with mode: 0644]
queue-4.7/udp-fix-poll-issue-with-zero-sized-packets.patch [new file with mode: 0644]
queue-4.7/vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch [new file with mode: 0644]

diff --git a/queue-4.7/af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch b/queue-4.7/af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch
new file mode 100644 (file)
index 0000000..b7a820f
--- /dev/null
@@ -0,0 +1,232 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 1 Sep 2016 14:43:53 -0700
+Subject: af_unix: split 'u->readlock' into two: 'iolock' and 'bindlock'
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+
+[ Upstream commit 6e1ce3c3451291142a57c4f3f6f999a29fb5b3bc ]
+
+Right now we use the 'readlock' both for protecting some of the af_unix
+IO path and for making the bind be single-threaded.
+
+The two are independent, but using the same lock makes for a nasty
+deadlock due to ordering with regards to filesystem locking.  The bind
+locking would want to nest outside the VSF pathname locking, but the IO
+locking wants to nest inside some of those same locks.
+
+We tried to fix this earlier with commit c845acb324aa ("af_unix: Fix
+splice-bind deadlock") which moved the readlock inside the vfs locks,
+but that caused problems with overlayfs that will then call back into
+filesystem routines that take the lock in the wrong order anyway.
+
+Splitting the locks means that we can go back to having the bind lock be
+the outermost lock, and we don't have any deadlocks with lock ordering.
+
+Acked-by: Rainer Weikusat <rweikusat@cyberadapt.com>
+Acked-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/af_unix.h |    2 +-
+ net/unix/af_unix.c    |   45 +++++++++++++++++++++++----------------------
+ 2 files changed, 24 insertions(+), 23 deletions(-)
+
+--- a/include/net/af_unix.h
++++ b/include/net/af_unix.h
+@@ -52,7 +52,7 @@ struct unix_sock {
+       struct sock             sk;
+       struct unix_address     *addr;
+       struct path             path;
+-      struct mutex            readlock;
++      struct mutex            iolock, bindlock;
+       struct sock             *peer;
+       struct list_head        link;
+       atomic_long_t           inflight;
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -661,11 +661,11 @@ static int unix_set_peek_off(struct sock
+ {
+       struct unix_sock *u = unix_sk(sk);
+-      if (mutex_lock_interruptible(&u->readlock))
++      if (mutex_lock_interruptible(&u->iolock))
+               return -EINTR;
+       sk->sk_peek_off = val;
+-      mutex_unlock(&u->readlock);
++      mutex_unlock(&u->iolock);
+       return 0;
+ }
+@@ -778,7 +778,8 @@ static struct sock *unix_create1(struct
+       spin_lock_init(&u->lock);
+       atomic_long_set(&u->inflight, 0);
+       INIT_LIST_HEAD(&u->link);
+-      mutex_init(&u->readlock); /* single task reading lock */
++      mutex_init(&u->iolock); /* single task reading lock */
++      mutex_init(&u->bindlock); /* single task binding lock */
+       init_waitqueue_head(&u->peer_wait);
+       init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
+       unix_insert_socket(unix_sockets_unbound(sk), sk);
+@@ -847,7 +848,7 @@ static int unix_autobind(struct socket *
+       int err;
+       unsigned int retries = 0;
+-      err = mutex_lock_interruptible(&u->readlock);
++      err = mutex_lock_interruptible(&u->bindlock);
+       if (err)
+               return err;
+@@ -894,7 +895,7 @@ retry:
+       spin_unlock(&unix_table_lock);
+       err = 0;
+-out:  mutex_unlock(&u->readlock);
++out:  mutex_unlock(&u->bindlock);
+       return err;
+ }
+@@ -1008,7 +1009,7 @@ static int unix_bind(struct socket *sock
+               goto out;
+       addr_len = err;
+-      err = mutex_lock_interruptible(&u->readlock);
++      err = mutex_lock_interruptible(&u->bindlock);
+       if (err)
+               goto out;
+@@ -1062,7 +1063,7 @@ static int unix_bind(struct socket *sock
+ out_unlock:
+       spin_unlock(&unix_table_lock);
+ out_up:
+-      mutex_unlock(&u->readlock);
++      mutex_unlock(&u->bindlock);
+ out:
+       return err;
+ }
+@@ -1954,17 +1955,17 @@ static ssize_t unix_stream_sendpage(stru
+       if (false) {
+ alloc_skb:
+               unix_state_unlock(other);
+-              mutex_unlock(&unix_sk(other)->readlock);
++              mutex_unlock(&unix_sk(other)->iolock);
+               newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
+                                             &err, 0);
+               if (!newskb)
+                       goto err;
+       }
+-      /* we must acquire readlock as we modify already present
++      /* we must acquire iolock as we modify already present
+        * skbs in the sk_receive_queue and mess with skb->len
+        */
+-      err = mutex_lock_interruptible(&unix_sk(other)->readlock);
++      err = mutex_lock_interruptible(&unix_sk(other)->iolock);
+       if (err) {
+               err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
+               goto err;
+@@ -2031,7 +2032,7 @@ alloc_skb:
+       }
+       unix_state_unlock(other);
+-      mutex_unlock(&unix_sk(other)->readlock);
++      mutex_unlock(&unix_sk(other)->iolock);
+       other->sk_data_ready(other);
+       scm_destroy(&scm);
+@@ -2040,7 +2041,7 @@ alloc_skb:
+ err_state_unlock:
+       unix_state_unlock(other);
+ err_unlock:
+-      mutex_unlock(&unix_sk(other)->readlock);
++      mutex_unlock(&unix_sk(other)->iolock);
+ err:
+       kfree_skb(newskb);
+       if (send_sigpipe && !(flags & MSG_NOSIGNAL))
+@@ -2108,7 +2109,7 @@ static int unix_dgram_recvmsg(struct soc
+       timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+       do {
+-              mutex_lock(&u->readlock);
++              mutex_lock(&u->iolock);
+               skip = sk_peek_offset(sk, flags);
+               skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
+@@ -2116,14 +2117,14 @@ static int unix_dgram_recvmsg(struct soc
+               if (skb)
+                       break;
+-              mutex_unlock(&u->readlock);
++              mutex_unlock(&u->iolock);
+               if (err != -EAGAIN)
+                       break;
+       } while (timeo &&
+                !__skb_wait_for_more_packets(sk, &err, &timeo, last));
+-      if (!skb) { /* implies readlock unlocked */
++      if (!skb) { /* implies iolock unlocked */
+               unix_state_lock(sk);
+               /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
+               if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
+@@ -2188,7 +2189,7 @@ static int unix_dgram_recvmsg(struct soc
+ out_free:
+       skb_free_datagram(sk, skb);
+-      mutex_unlock(&u->readlock);
++      mutex_unlock(&u->iolock);
+ out:
+       return err;
+ }
+@@ -2283,7 +2284,7 @@ static int unix_stream_read_generic(stru
+       /* Lock the socket to prevent queue disordering
+        * while sleeps in memcpy_tomsg
+        */
+-      mutex_lock(&u->readlock);
++      mutex_lock(&u->iolock);
+       if (flags & MSG_PEEK)
+               skip = sk_peek_offset(sk, flags);
+@@ -2325,7 +2326,7 @@ again:
+                               break;
+                       }
+-                      mutex_unlock(&u->readlock);
++                      mutex_unlock(&u->iolock);
+                       timeo = unix_stream_data_wait(sk, timeo, last,
+                                                     last_len);
+@@ -2336,7 +2337,7 @@ again:
+                               goto out;
+                       }
+-                      mutex_lock(&u->readlock);
++                      mutex_lock(&u->iolock);
+                       goto redo;
+ unlock:
+                       unix_state_unlock(sk);
+@@ -2439,7 +2440,7 @@ unlock:
+               }
+       } while (size);
+-      mutex_unlock(&u->readlock);
++      mutex_unlock(&u->iolock);
+       if (state->msg)
+               scm_recv(sock, state->msg, &scm, flags);
+       else
+@@ -2480,9 +2481,9 @@ static ssize_t skb_unix_socket_splice(st
+       int ret;
+       struct unix_sock *u = unix_sk(sk);
+-      mutex_unlock(&u->readlock);
++      mutex_unlock(&u->iolock);
+       ret = splice_to_pipe(pipe, spd);
+-      mutex_lock(&u->readlock);
++      mutex_lock(&u->iolock);
+       return ret;
+ }
diff --git a/queue-4.7/bnxt_en-fix-tx-push-operation-on-arm64.patch b/queue-4.7/bnxt_en-fix-tx-push-operation-on-arm64.patch
new file mode 100644 (file)
index 0000000..c98d8f1
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Michael Chan <michael.chan@broadcom.com>
+Date: Mon, 5 Sep 2016 01:57:35 -0400
+Subject: bnxt_en: Fix TX push operation on ARM64.
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+
+[ Upstream commit 9d13744bb75078175ab49408f2abb980e4dbccc9 ]
+
+There is a code path where we are calling __iowrite64_copy() on
+an address that is not 64-bit aligned.  This causes an exception on
+some architectures such as arm64.  Fix that code path by using
+__iowrite32_copy().
+
+Reported-by: JD Zheng <jiandong.zheng@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -293,8 +293,8 @@ static netdev_tx_t bnxt_start_xmit(struc
+               push_len = (length + sizeof(*tx_push) + 7) / 8;
+               if (push_len > 16) {
+                       __iowrite64_copy(txr->tx_doorbell, tx_push_buf, 16);
+-                      __iowrite64_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
+-                                       push_len - 16);
++                      __iowrite32_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
++                                       (push_len - 16) << 1);
+               } else {
+                       __iowrite64_copy(txr->tx_doorbell, tx_push_buf,
+                                        push_len);
diff --git a/queue-4.7/bonding-fix-bonding-crash.patch b/queue-4.7/bonding-fix-bonding-crash.patch
new file mode 100644 (file)
index 0000000..2ad2982
--- /dev/null
@@ -0,0 +1,93 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Mahesh Bandewar <maheshb@google.com>
+Date: Thu, 1 Sep 2016 22:18:34 -0700
+Subject: bonding: Fix bonding crash
+
+From: Mahesh Bandewar <maheshb@google.com>
+
+
+[ Upstream commit 24b27fc4cdf9e10c5e79e5923b6b7c2c5c95096c ]
+
+Following few steps will crash kernel -
+
+  (a) Create bonding master
+      > modprobe bonding miimon=50
+  (b) Create macvlan bridge on eth2
+      > ip link add link eth2 dev mvl0 address aa:0:0:0:0:01 \
+          type macvlan
+  (c) Now try adding eth2 into the bond
+      > echo +eth2 > /sys/class/net/bond0/bonding/slaves
+      <crash>
+
+Bonding does lots of things before checking if the device enslaved is
+busy or not.
+
+In this case when the notifier call-chain sends notifications, the
+bond_netdev_event() assumes that the rx_handler /rx_handler_data is
+registered while the bond_enslave() hasn't progressed far enough to
+register rx_handler for the new slave.
+
+This patch adds a rx_handler check that can be performed right at the
+beginning of the enslave code to avoid getting into this situation.
+
+Signed-off-by: Mahesh Bandewar <maheshb@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |    7 ++++---
+ include/linux/netdevice.h       |    1 +
+ net/core/dev.c                  |   16 ++++++++++++++++
+ 3 files changed, 21 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -1341,9 +1341,10 @@ int bond_enslave(struct net_device *bond
+                           slave_dev->name);
+       }
+-      /* already enslaved */
+-      if (slave_dev->flags & IFF_SLAVE) {
+-              netdev_dbg(bond_dev, "Error: Device was already enslaved\n");
++      /* already in-use? */
++      if (netdev_is_rx_handler_busy(slave_dev)) {
++              netdev_err(bond_dev,
++                         "Error: Device is in use and cannot be enslaved\n");
+               return -EBUSY;
+       }
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -3225,6 +3225,7 @@ static inline void napi_free_frags(struc
+       napi->skb = NULL;
+ }
++bool netdev_is_rx_handler_busy(struct net_device *dev);
+ int netdev_rx_handler_register(struct net_device *dev,
+                              rx_handler_func_t *rx_handler,
+                              void *rx_handler_data);
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3979,6 +3979,22 @@ sch_handle_ingress(struct sk_buff *skb,
+ }
+ /**
++ *    netdev_is_rx_handler_busy - check if receive handler is registered
++ *    @dev: device to check
++ *
++ *    Check if a receive handler is already registered for a given device.
++ *    Return true if there one.
++ *
++ *    The caller must hold the rtnl_mutex.
++ */
++bool netdev_is_rx_handler_busy(struct net_device *dev)
++{
++      ASSERT_RTNL();
++      return dev && rtnl_dereference(dev->rx_handler);
++}
++EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
++
++/**
+  *    netdev_rx_handler_register - register receive handler
+  *    @dev: device to register a handler for
+  *    @rx_handler: receive handler to register
diff --git a/queue-4.7/bpf-fix-method-of-ptr_to_packet-reg-id-generation.patch b/queue-4.7/bpf-fix-method-of-ptr_to_packet-reg-id-generation.patch
new file mode 100644 (file)
index 0000000..391d571
--- /dev/null
@@ -0,0 +1,74 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+Date: Tue, 2 Aug 2016 16:12:14 +0100
+Subject: bpf: fix method of PTR_TO_PACKET reg id generation
+
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+
+
+[ Upstream commit 1f415a74b0ca64b5bfacbb12d71ed2ec050a8cfb ]
+
+Using per-register incrementing ID can lead to
+find_good_pkt_pointers() confusing registers which
+have completely different values.  Consider example:
+
+0: (bf) r6 = r1
+1: (61) r8 = *(u32 *)(r6 +76)
+2: (61) r0 = *(u32 *)(r6 +80)
+3: (bf) r7 = r8
+4: (07) r8 += 32
+5: (2d) if r8 > r0 goto pc+9
+ R0=pkt_end R1=ctx R6=ctx R7=pkt(id=0,off=0,r=32) R8=pkt(id=0,off=32,r=32) R10=fp
+6: (bf) r8 = r7
+7: (bf) r9 = r7
+8: (71) r1 = *(u8 *)(r7 +0)
+9: (0f) r8 += r1
+10: (71) r1 = *(u8 *)(r7 +1)
+11: (0f) r9 += r1
+12: (07) r8 += 32
+13: (2d) if r8 > r0 goto pc+1
+ R0=pkt_end R1=inv56 R6=ctx R7=pkt(id=0,off=0,r=32) R8=pkt(id=1,off=32,r=32) R9=pkt(id=1,off=0,r=32) R10=fp
+14: (71) r1 = *(u8 *)(r9 +16)
+15: (b7) r7 = 0
+16: (bf) r0 = r7
+17: (95) exit
+
+We need to get a UNKNOWN_VALUE with imm to force id
+generation so lines 0-5 make r7 a valid packet pointer.
+We then read two different bytes from the packet and
+add them to copies of the constructed packet pointer.
+r8 (line 9) and r9 (line 11) will get the same id of 1,
+independently.  When either of them is validated (line
+13) - find_good_pkt_pointers() will also mark the other
+as safe.  This leads to access on line 14 being mistakenly
+considered safe.
+
+Fixes: 969bf05eb3ce ("bpf: direct packet access")
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -194,6 +194,7 @@ struct verifier_env {
+       struct verifier_state_list **explored_states; /* search pruning optimization */
+       struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
+       u32 used_map_cnt;               /* number of used maps */
++      u32 id_gen;                     /* used to generate unique reg IDs */
+       bool allow_ptr_leaks;
+ };
+@@ -1277,7 +1278,7 @@ add_imm:
+               /* dst_reg stays as pkt_ptr type and since some positive
+                * integer value was added to the pointer, increment its 'id'
+                */
+-              dst_reg->id++;
++              dst_reg->id = ++env->id_gen;
+               /* something was added to pkt_ptr, set range and off to zero */
+               dst_reg->off = 0;
diff --git a/queue-4.7/bpf-fix-write-helpers-with-regards-to-non-linear-parts.patch b/queue-4.7/bpf-fix-write-helpers-with-regards-to-non-linear-parts.patch
new file mode 100644 (file)
index 0000000..e79664e
--- /dev/null
@@ -0,0 +1,207 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Thu, 11 Aug 2016 21:38:37 +0200
+Subject: bpf: fix write helpers with regards to non-linear parts
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit 0ed661d5a48fa6df0b50ae64d27fe759a3ce42cf ]
+
+Fix the bpf_try_make_writable() helper and all call sites we have in BPF,
+it's currently defect with regards to skbs when the write_len spans into
+non-linear parts, no matter if cloned or not.
+
+There are multiple issues at once. First, using skb_store_bits() is not
+correct since even if we have a cloned skb, page frags can still be shared.
+To really make them private, we need to pull them in via __pskb_pull_tail()
+first, which also gets us a private head via pskb_expand_head() implicitly.
+
+This is for helpers like bpf_skb_store_bytes(), bpf_l3_csum_replace(),
+bpf_l4_csum_replace(). Really, the only thing reasonable and working here
+is to call skb_ensure_writable() before any write operation. Meaning, via
+pskb_may_pull() it makes sure that parts we want to access are pulled in and
+if not does so plus unclones the skb implicitly. If our write_len still fits
+the headlen and we're cloned and our header of the clone is not writable,
+then we need to make a private copy via pskb_expand_head(). skb_store_bits()
+is a bit misleading and only safe to store into non-linear data in different
+contexts such as 357b40a18b04 ("[IPV6]: IPV6_CHECKSUM socket option can
+corrupt kernel memory").
+
+For above BPF helper functions, it means after fixed bpf_try_make_writable(),
+we've pulled in enough, so that we operate always based on skb->data. Thus,
+the call to skb_header_pointer() and skb_store_bits() becomes superfluous.
+In bpf_skb_store_bytes(), the len check is unnecessary too since it can
+only pass in maximum of BPF stack size, so adding offset is guaranteed to
+never overflow. Also bpf_l3/4_csum_replace() helpers must test for proper
+offset alignment since they use __sum16 pointer for writing resulting csum.
+
+The remaining helpers that change skb data not discussed here yet are
+bpf_skb_vlan_push(), bpf_skb_vlan_pop() and bpf_skb_change_proto(). The
+vlan helpers internally call either skb_ensure_writable() (pop case) and
+skb_cow_head() (push case, for head expansion), respectively. Similarly,
+bpf_skb_proto_xlat() takes care to not mangle page frags.
+
+Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action")
+Fixes: 91bc4822c3d6 ("tc: bpf: add checksum helpers")
+Fixes: 3697649ff29e ("bpf: try harder on clones when writing into skb")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/filter.c |   70 +++++++++++++-----------------------------------------
+ 1 file changed, 18 insertions(+), 52 deletions(-)
+
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -1353,54 +1353,33 @@ static inline int bpf_try_make_writable(
+ {
+       int err;
+-      if (!skb_cloned(skb))
+-              return 0;
+-      if (skb_clone_writable(skb, write_len))
+-              return 0;
+-      err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+-      if (!err)
+-              bpf_compute_data_end(skb);
++      err = skb_ensure_writable(skb, write_len);
++      bpf_compute_data_end(skb);
++
+       return err;
+ }
+ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
+ {
+-      struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
+       struct sk_buff *skb = (struct sk_buff *) (long) r1;
+-      int offset = (int) r2;
++      unsigned int offset = (unsigned int) r2;
+       void *from = (void *) (long) r3;
+       unsigned int len = (unsigned int) r4;
+       void *ptr;
+       if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
+               return -EINVAL;
+-
+-      /* bpf verifier guarantees that:
+-       * 'from' pointer points to bpf program stack
+-       * 'len' bytes of it were initialized
+-       * 'len' > 0
+-       * 'skb' is a valid pointer to 'struct sk_buff'
+-       *
+-       * so check for invalid 'offset' and too large 'len'
+-       */
+-      if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
++      if (unlikely(offset > 0xffff))
+               return -EFAULT;
+       if (unlikely(bpf_try_make_writable(skb, offset + len)))
+               return -EFAULT;
+-      ptr = skb_header_pointer(skb, offset, len, sp->buff);
+-      if (unlikely(!ptr))
+-              return -EFAULT;
+-
++      ptr = skb->data + offset;
+       if (flags & BPF_F_RECOMPUTE_CSUM)
+               skb_postpull_rcsum(skb, ptr, len);
+       memcpy(ptr, from, len);
+-      if (ptr == sp->buff)
+-              /* skb_store_bits cannot return -EFAULT here */
+-              skb_store_bits(skb, offset, ptr, len);
+-
+       if (flags & BPF_F_RECOMPUTE_CSUM)
+               skb_postpush_rcsum(skb, ptr, len);
+       if (flags & BPF_F_INVALIDATE_HASH)
+@@ -1423,12 +1402,12 @@ static const struct bpf_func_proto bpf_s
+ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+ {
+       const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1;
+-      int offset = (int) r2;
++      unsigned int offset = (unsigned int) r2;
+       void *to = (void *)(unsigned long) r3;
+       unsigned int len = (unsigned int) r4;
+       void *ptr;
+-      if (unlikely((u32) offset > 0xffff))
++      if (unlikely(offset > 0xffff))
+               goto err_clear;
+       ptr = skb_header_pointer(skb, offset, len, to);
+@@ -1456,20 +1435,17 @@ static const struct bpf_func_proto bpf_s
+ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+ {
+       struct sk_buff *skb = (struct sk_buff *) (long) r1;
+-      int offset = (int) r2;
+-      __sum16 sum, *ptr;
++      unsigned int offset = (unsigned int) r2;
++      __sum16 *ptr;
+       if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
+               return -EINVAL;
+-      if (unlikely((u32) offset > 0xffff))
++      if (unlikely(offset > 0xffff || offset & 1))
+               return -EFAULT;
+-      if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
+-              return -EFAULT;
+-
+-      ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
+-      if (unlikely(!ptr))
++      if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
+               return -EFAULT;
++      ptr = (__sum16 *)(skb->data + offset);
+       switch (flags & BPF_F_HDR_FIELD_MASK) {
+       case 0:
+               if (unlikely(from != 0))
+@@ -1487,10 +1463,6 @@ static u64 bpf_l3_csum_replace(u64 r1, u
+               return -EINVAL;
+       }
+-      if (ptr == &sum)
+-              /* skb_store_bits guaranteed to not return -EFAULT here */
+-              skb_store_bits(skb, offset, ptr, sizeof(sum));
+-
+       return 0;
+ }
+@@ -1510,20 +1482,18 @@ static u64 bpf_l4_csum_replace(u64 r1, u
+       struct sk_buff *skb = (struct sk_buff *) (long) r1;
+       bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
+       bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
+-      int offset = (int) r2;
+-      __sum16 sum, *ptr;
++      unsigned int offset = (unsigned int) r2;
++      __sum16 *ptr;
+       if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
+                              BPF_F_HDR_FIELD_MASK)))
+               return -EINVAL;
+-      if (unlikely((u32) offset > 0xffff))
++      if (unlikely(offset > 0xffff || offset & 1))
+               return -EFAULT;
+-      if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
++      if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
+               return -EFAULT;
+-      ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
+-      if (unlikely(!ptr))
+-              return -EFAULT;
++      ptr = (__sum16 *)(skb->data + offset);
+       if (is_mmzero && !*ptr)
+               return 0;
+@@ -1546,10 +1516,6 @@ static u64 bpf_l4_csum_replace(u64 r1, u
+       if (is_mmzero && !*ptr)
+               *ptr = CSUM_MANGLED_0;
+-      if (ptr == &sum)
+-              /* skb_store_bits guaranteed to not return -EFAULT here */
+-              skb_store_bits(skb, offset, ptr, sizeof(sum));
+-
+       return 0;
+ }
diff --git a/queue-4.7/bridge-re-introduce-fix-parsing-of-mldv2-reports.patch b/queue-4.7/bridge-re-introduce-fix-parsing-of-mldv2-reports.patch
new file mode 100644 (file)
index 0000000..8a4edc5
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Davide Caratti <dcaratti@redhat.com>
+Date: Wed, 31 Aug 2016 14:16:44 +0200
+Subject: bridge: re-introduce 'fix parsing of MLDv2 reports'
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+
+[ Upstream commit 9264251ee2a55bce8fb93826b3f581fb9eb7e2c2 ]
+
+commit bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with
+INCLUDE and no sources as a leave") seems to have accidentally reverted
+commit 47cc84ce0c2f ("bridge: fix parsing of MLDv2 reports"). This
+commit brings back a change to br_ip6_multicast_mld2_report() where
+parsing of MLDv2 reports stops when the first group is successfully
+added to the MDB cache.
+
+Fixes: bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave")
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Acked-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_multicast.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -1121,7 +1121,7 @@ static int br_ip6_multicast_mld2_report(
+               } else {
+                       err = br_ip6_multicast_add_group(br, port,
+                                                        &grec->grec_mca, vid);
+-                      if (!err)
++                      if (err)
+                               break;
+               }
+       }
diff --git a/queue-4.7/ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch b/queue-4.7/ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch
new file mode 100644 (file)
index 0000000..8c919f8
--- /dev/null
@@ -0,0 +1,101 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: David Forster <dforster@brocade.com>
+Date: Wed, 3 Aug 2016 15:13:01 +0100
+Subject: ipv4: panic in leaf_walk_rcu due to stale node pointer
+
+From: David Forster <dforster@brocade.com>
+
+
+[ Upstream commit 94d9f1c5906b20053efe375b6d66610bca4b8b64 ]
+
+Panic occurs when issuing "cat /proc/net/route" whilst
+populating FIB with > 1M routes.
+
+Use of cached node pointer in fib_route_get_idx is unsafe.
+
+ BUG: unable to handle kernel paging request at ffffc90001630024
+ IP: [<ffffffff814cf6a0>] leaf_walk_rcu+0x10/0xe0
+ PGD 11b08d067 PUD 11b08e067 PMD dac4b067 PTE 0
+ Oops: 0000 [#1] SMP
+ Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscac
+ snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep virti
+ acpi_cpufreq button parport_pc ppdev lp parport autofs4 ext4 crc16 mbcache jbd
+tio_ring virtio floppy uhci_hcd ehci_hcd usbcore usb_common libata scsi_mod
+ CPU: 1 PID: 785 Comm: cat Not tainted 4.2.0-rc8+ #4
+ Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
+ task: ffff8800da1c0bc0 ti: ffff88011a05c000 task.ti: ffff88011a05c000
+ RIP: 0010:[<ffffffff814cf6a0>]  [<ffffffff814cf6a0>] leaf_walk_rcu+0x10/0xe0
+ RSP: 0018:ffff88011a05fda0  EFLAGS: 00010202
+ RAX: ffff8800d8a40c00 RBX: ffff8800da4af940 RCX: ffff88011a05ff20
+ RDX: ffffc90001630020 RSI: 0000000001013531 RDI: ffff8800da4af950
+ RBP: 0000000000000000 R08: ffff8800da1f9a00 R09: 0000000000000000
+ R10: ffff8800db45b7e4 R11: 0000000000000246 R12: ffff8800da4af950
+ R13: ffff8800d97a74c0 R14: 0000000000000000 R15: ffff8800d97a7480
+ FS:  00007fd3970e0700(0000) GS:ffff88011fd00000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
+ CR2: ffffc90001630024 CR3: 000000011a7e4000 CR4: 00000000000006e0
+ Stack:
+  ffffffff814d00d3 0000000000000000 ffff88011a05ff20 ffff8800da1f9a00
+  ffffffff811dd8b9 0000000000000800 0000000000020000 00007fd396f35000
+  ffffffff811f8714 0000000000003431 ffffffff8138dce0 0000000000000f80
+ Call Trace:
+  [<ffffffff814d00d3>] ? fib_route_seq_start+0x93/0xc0
+  [<ffffffff811dd8b9>] ? seq_read+0x149/0x380
+  [<ffffffff811f8714>] ? fsnotify+0x3b4/0x500
+  [<ffffffff8138dce0>] ? process_echoes+0x70/0x70
+  [<ffffffff8121cfa7>] ? proc_reg_read+0x47/0x70
+  [<ffffffff811bb823>] ? __vfs_read+0x23/0xd0
+  [<ffffffff811bbd42>] ? rw_verify_area+0x52/0xf0
+  [<ffffffff811bbe61>] ? vfs_read+0x81/0x120
+  [<ffffffff811bcbc2>] ? SyS_read+0x42/0xa0
+  [<ffffffff81549ab2>] ? entry_SYSCALL_64_fastpath+0x16/0x75
+ Code: 48 85 c0 75 d8 f3 c3 31 c0 c3 f3 c3 66 66 66 66 66 66 2e 0f 1f 84 00 00
+a 04 89 f0 33 02 44 89 c9 48 d3 e8 0f b6 4a 05 49 89
+ RIP  [<ffffffff814cf6a0>] leaf_walk_rcu+0x10/0xe0
+  RSP <ffff88011a05fda0>
+ CR2: ffffc90001630024
+
+Signed-off-by: Dave Forster <dforster@brocade.com>
+Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_trie.c |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -2452,9 +2452,7 @@ struct fib_route_iter {
+ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
+                                           loff_t pos)
+ {
+-      struct fib_table *tb = iter->main_tb;
+       struct key_vector *l, **tp = &iter->tnode;
+-      struct trie *t;
+       t_key key;
+       /* use cache location of next-to-find key */
+@@ -2462,8 +2460,6 @@ static struct key_vector *fib_route_get_
+               pos -= iter->pos;
+               key = iter->key;
+       } else {
+-              t = (struct trie *)tb->tb_data;
+-              iter->tnode = t->kv;
+               iter->pos = 0;
+               key = 0;
+       }
+@@ -2504,12 +2500,12 @@ static void *fib_route_seq_start(struct
+               return NULL;
+       iter->main_tb = tb;
++      t = (struct trie *)tb->tb_data;
++      iter->tnode = t->kv;
+       if (*pos != 0)
+               return fib_route_get_idx(iter, *pos);
+-      t = (struct trie *)tb->tb_data;
+-      iter->tnode = t->kv;
+       iter->pos = 0;
+       iter->key = 0;
diff --git a/queue-4.7/ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch b/queue-4.7/ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch
new file mode 100644 (file)
index 0000000..54fde3c
--- /dev/null
@@ -0,0 +1,60 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Wei Yongjun <weiyongjun1@huawei.com>
+Date: Mon, 5 Sep 2016 16:06:31 +0800
+Subject: ipv6: addrconf: fix dev refcont leak when DAD failed
+
+From: Wei Yongjun <weiyongjun1@huawei.com>
+
+
+[ Upstream commit 751eb6b6042a596b0080967c1a529a9fe98dac1d ]
+
+In general, when DAD detected IPv6 duplicate address, ifp->state
+will be set to INET6_IFADDR_STATE_ERRDAD and DAD is stopped by a
+delayed work, the call tree should be like this:
+
+ndisc_recv_ns
+  -> addrconf_dad_failure        <- missing ifp put
+     -> addrconf_mod_dad_work
+       -> schedule addrconf_dad_work()
+         -> addrconf_dad_stop()  <- missing ifp hold before call it
+
+addrconf_dad_failure() called with ifp refcont holding but not put.
+addrconf_dad_work() call addrconf_dad_stop() without extra holding
+refcount. This will not cause any issue normally.
+
+But the race between addrconf_dad_failure() and addrconf_dad_work()
+may cause ifp refcount leak and netdevice can not be unregister,
+dmesg show the following messages:
+
+IPv6: eth0: IPv6 duplicate address fe80::XX:XXXX:XXXX:XX detected!
+...
+unregister_netdevice: waiting for eth0 to become free. Usage count = 1
+
+Cc: stable@vger.kernel.org
+Fixes: c15b1ccadb32 ("ipv6: move DAD and addrconf_verify processing
+to workqueue")
+Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -1906,6 +1906,7 @@ errdad:
+       spin_unlock_bh(&ifp->lock);
+       addrconf_mod_dad_work(ifp, 0);
++      in6_ifa_put(ifp);
+ }
+ /* Join to solicited addr multicast group.
+@@ -3771,6 +3772,7 @@ static void addrconf_dad_work(struct wor
+               addrconf_dad_begin(ifp);
+               goto out;
+       } else if (action == DAD_ABORT) {
++              in6_ifa_hold(ifp);
+               addrconf_dad_stop(ifp, 1);
+               goto out;
+       }
diff --git a/queue-4.7/ipv6-release-dst-in-ping_v6_sendmsg.patch b/queue-4.7/ipv6-release-dst-in-ping_v6_sendmsg.patch
new file mode 100644 (file)
index 0000000..82c681c
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Dave Jones <davej@codemonkey.org.uk>
+Date: Fri, 2 Sep 2016 14:39:50 -0400
+Subject: ipv6: release dst in ping_v6_sendmsg
+
+From: Dave Jones <davej@codemonkey.org.uk>
+
+
+[ Upstream commit 03c2778a938aaba0893f6d6cdc29511d91a79848 ]
+
+Neither the failure or success paths of ping_v6_sendmsg release
+the dst it acquires.  This leads to a flood of warnings from
+"net/core/dst.c:288 dst_release" on older kernels that
+don't have 8bf4ada2e21378816b28205427ee6b0e1ca4c5f1 backported.
+
+That patch optimistically hoped this had been fixed post 3.10, but
+it seems at least one case wasn't, where I've seen this triggered
+a lot from machines doing unprivileged icmp sockets.
+
+Cc: Martin Lau <kafai@fb.com>
+Signed-off-by: Dave Jones <davej@codemonkey.org.uk>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ping.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/ping.c
++++ b/net/ipv6/ping.c
+@@ -122,8 +122,10 @@ static int ping_v6_sendmsg(struct sock *
+       rt = (struct rt6_info *) dst;
+       np = inet6_sk(sk);
+-      if (!np)
+-              return -EBADF;
++      if (!np) {
++              err = -EBADF;
++              goto dst_err_out;
++      }
+       if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+               fl6.flowi6_oif = np->mcast_oif;
+@@ -160,6 +162,9 @@ static int ping_v6_sendmsg(struct sock *
+       }
+       release_sock(sk);
++dst_err_out:
++      dst_release(dst);
++
+       if (err)
+               return err;
diff --git a/queue-4.7/kcm-fix-a-socket-double-free.patch b/queue-4.7/kcm-fix-a-socket-double-free.patch
new file mode 100644 (file)
index 0000000..350ff1d
--- /dev/null
@@ -0,0 +1,58 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Sun, 28 Aug 2016 21:28:26 -0700
+Subject: kcm: fix a socket double free
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit c0338aff2260ea6c092806312dbb154cec07a242 ]
+
+Dmitry reported a double free on kcm socket, which could
+be easily reproduced by:
+
+       #include <unistd.h>
+       #include <sys/syscall.h>
+
+       int main()
+       {
+         int fd = syscall(SYS_socket, 0x29ul, 0x5ul, 0x0ul, 0, 0, 0);
+         syscall(SYS_ioctl, fd, 0x89e2ul, 0x20a98000ul, 0, 0, 0);
+         return 0;
+       }
+
+This is because on the error path, after we install
+the new socket file, we call sock_release() to clean
+up the socket, which leaves the fd pointing to a freed
+socket. Fix this by calling sys_close() on that fd
+directly.
+
+Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module")
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Tom Herbert <tom@herbertland.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/kcm/kcmsock.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/kcm/kcmsock.c
++++ b/net/kcm/kcmsock.c
+@@ -13,6 +13,7 @@
+ #include <linux/socket.h>
+ #include <linux/uaccess.h>
+ #include <linux/workqueue.h>
++#include <linux/syscalls.h>
+ #include <net/kcm.h>
+ #include <net/netns/generic.h>
+ #include <net/sock.h>
+@@ -2035,7 +2036,7 @@ static int kcm_ioctl(struct socket *sock
+                       if (copy_to_user((void __user *)arg, &info,
+                                        sizeof(info))) {
+                               err = -EFAULT;
+-                              sock_release(newsock);
++                              sys_close(info.fd);
+                       }
+               }
diff --git a/queue-4.7/net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch b/queue-4.7/net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch
new file mode 100644 (file)
index 0000000..d471433
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Wed, 24 Aug 2016 11:01:20 -0700
+Subject: net: dsa: bcm_sf2: Fix race condition while unmasking interrupts
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit 4f101c47791cdcb831b3ef1f831b1cc51e4fe03c ]
+
+We kept shadow copies of which interrupt sources we have enabled and
+disabled, but due to an order bug in how intrl2_mask_clear was defined,
+we could run into the following scenario:
+
+CPU0                                   CPU1
+intrl2_1_mask_clear(..)
+sets INTRL2_CPU_MASK_CLEAR
+                                       bcm_sf2_switch_1_isr
+                                       read INTRL2_CPU_STATUS and masks with stale
+                                       irq1_mask value
+updates irq1_mask value
+
+Which would make us loop again and again trying to process and interrupt
+we are not clearing since our copy of whether it was enabled before
+still indicates it was not. Fix this by updating the shadow copy first,
+and then unasking at the HW level.
+
+Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/dsa/bcm_sf2.h
++++ b/drivers/net/dsa/bcm_sf2.h
+@@ -189,8 +189,8 @@ static inline void name##_writeq(struct
+ static inline void intrl2_##which##_mask_clear(struct bcm_sf2_priv *priv, \
+                                               u32 mask)               \
+ {                                                                     \
+-      intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);     \
+       priv->irq##which##_mask &= ~(mask);                             \
++      intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);     \
+ }                                                                     \
+ static inline void intrl2_##which##_mask_set(struct bcm_sf2_priv *priv, \
+                                               u32 mask)               \
diff --git a/queue-4.7/net-ipv6-do-not-keep-ipv6-addresses-when-ipv6-is-disabled.patch b/queue-4.7/net-ipv6-do-not-keep-ipv6-addresses-when-ipv6-is-disabled.patch
new file mode 100644 (file)
index 0000000..1d87db6
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Mike Manning <mmanning@brocade.com>
+Date: Fri, 12 Aug 2016 12:02:38 +0100
+Subject: net: ipv6: Do not keep IPv6 addresses when IPv6 is disabled
+
+From: Mike Manning <mmanning@brocade.com>
+
+
+[ Upstream commit bc561632dddd5af0c4444d919f01cbf6d553aa0a ]
+
+If IPv6 is disabled when the option is set to keep IPv6
+addresses on link down, userspace is unaware of this as
+there is no such indication via netlink. The solution is to
+remove the IPv6 addresses in this case, which results in
+netlink messages indicating removal of addresses in the
+usual manner. This fix also makes the behavior consistent
+with the case of having IPv6 disabled first, which stops
+IPv6 addresses from being added.
+
+Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
+Signed-off-by: Mike Manning <mmanning@brocade.com>
+Acked-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -3469,7 +3469,7 @@ static int addrconf_ifdown(struct net_de
+       /* combine the user config with event to determine if permanent
+        * addresses are to be removed from address hash table
+        */
+-      keep_addr = !(how || _keep_addr <= 0);
++      keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6);
+       /* Step 2: clear hash table */
+       for (i = 0; i < IN6_ADDR_HSIZE; i++) {
+@@ -3525,7 +3525,7 @@ restart:
+       /* re-combine the user config with event to determine if permanent
+        * addresses are to be removed from the interface list
+        */
+-      keep_addr = (!how && _keep_addr > 0);
++      keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
+       INIT_LIST_HEAD(&del_list);
+       list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
diff --git a/queue-4.7/net-irda-handle-iriap_register_lsap-allocation-failure.patch b/queue-4.7/net-irda-handle-iriap_register_lsap-allocation-failure.patch
new file mode 100644 (file)
index 0000000..83a3a00
--- /dev/null
@@ -0,0 +1,74 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Vegard Nossum <vegard.nossum@oracle.com>
+Date: Fri, 12 Aug 2016 10:29:13 +0200
+Subject: net/irda: handle iriap_register_lsap() allocation failure
+
+From: Vegard Nossum <vegard.nossum@oracle.com>
+
+
+[ Upstream commit 5ba092efc7ddff040777ae7162f1d195f513571b ]
+
+If iriap_register_lsap() fails to allocate memory, self->lsap is
+set to NULL. However, none of the callers handle the failure and
+irlmp_connect_request() will happily dereference it:
+
+    iriap_register_lsap: Unable to allocated LSAP!
+    ================================================================================
+    UBSAN: Undefined behaviour in net/irda/irlmp.c:378:2
+    member access within null pointer of type 'struct lsap_cb'
+    CPU: 1 PID: 15403 Comm: trinity-c0 Not tainted 4.8.0-rc1+ #81
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org
+    04/01/2014
+     0000000000000000 ffff88010c7e78a8 ffffffff82344f40 0000000041b58ab3
+     ffffffff84f98000 ffffffff82344e94 ffff88010c7e78d0 ffff88010c7e7880
+     ffff88010630ad00 ffffffff84a5fae0 ffffffff84d3f5c0 000000000000017a
+    Call Trace:
+     [<ffffffff82344f40>] dump_stack+0xac/0xfc
+     [<ffffffff8242f5a8>] ubsan_epilogue+0xd/0x8a
+     [<ffffffff824302bf>] __ubsan_handle_type_mismatch+0x157/0x411
+     [<ffffffff83b7bdbc>] irlmp_connect_request+0x7ac/0x970
+     [<ffffffff83b77cc0>] iriap_connect_request+0xa0/0x160
+     [<ffffffff83b77f48>] state_s_disconnect+0x88/0xd0
+     [<ffffffff83b78904>] iriap_do_client_event+0x94/0x120
+     [<ffffffff83b77710>] iriap_getvaluebyclass_request+0x3e0/0x6d0
+     [<ffffffff83ba6ebb>] irda_find_lsap_sel+0x1eb/0x630
+     [<ffffffff83ba90c8>] irda_connect+0x828/0x12d0
+     [<ffffffff833c0dfb>] SYSC_connect+0x22b/0x340
+     [<ffffffff833c7e09>] SyS_connect+0x9/0x10
+     [<ffffffff81007bd3>] do_syscall_64+0x1b3/0x4b0
+     [<ffffffff845f946a>] entry_SYSCALL64_slow_path+0x25/0x25
+    ================================================================================
+
+The bug seems to have been around since forever.
+
+There's more problems with missing error checks in iriap_init() (and
+indeed all of irda_init()), but that's a bigger problem that needs
+very careful review and testing. This patch will fix the most serious
+bug (as it's easily reached from unprivileged userspace).
+
+I have tested my patch with a reproducer.
+
+Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/irda/iriap.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/irda/iriap.c
++++ b/net/irda/iriap.c
+@@ -185,8 +185,12 @@ struct iriap_cb *iriap_open(__u8 slsap_s
+       self->magic = IAS_MAGIC;
+       self->mode = mode;
+-      if (mode == IAS_CLIENT)
+-              iriap_register_lsap(self, slsap_sel, mode);
++      if (mode == IAS_CLIENT) {
++              if (iriap_register_lsap(self, slsap_sel, mode)) {
++                      kfree(self);
++                      return NULL;
++              }
++      }
+       self->confirm = callback;
+       self->priv = priv;
diff --git a/queue-4.7/net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch b/queue-4.7/net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch
new file mode 100644 (file)
index 0000000..bc8b3b8
--- /dev/null
@@ -0,0 +1,259 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Paul Blakey <paulb@mellanox.com>
+Date: Thu, 18 Aug 2016 21:09:05 +0300
+Subject: net/mlx5: Added missing check of msg length in verifying its signature
+
+From: Paul Blakey <paulb@mellanox.com>
+
+
+[ Upstream commit 2c0f8ce1b584a4d7b8ff53140d21dfed99834940 ]
+
+Set and verify signature calculates the signature for each of the
+mailbox nodes, even for those that are unused (from cache). Added
+a missing length check to set and verify only those which are used.
+
+While here, also moved the setting of msg's nodes token to where we
+already go over them. This saves a pass because checksum is disabled,
+and the only useful thing remaining that set signature does is setting
+the token.
+
+Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB
+adapters')
+Signed-off-by: Paul Blakey <paulb@mellanox.com>
+
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c |   83 ++++++++++++++++----------
+ 1 file changed, 53 insertions(+), 30 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -143,13 +143,14 @@ static struct mlx5_cmd_layout *get_inst(
+       return cmd->cmd_buf + (idx << cmd->log_stride);
+ }
+-static u8 xor8_buf(void *buf, int len)
++static u8 xor8_buf(void *buf, size_t offset, int len)
+ {
+       u8 *ptr = buf;
+       u8 sum = 0;
+       int i;
++      int end = len + offset;
+-      for (i = 0; i < len; i++)
++      for (i = offset; i < end; i++)
+               sum ^= ptr[i];
+       return sum;
+@@ -157,41 +158,49 @@ static u8 xor8_buf(void *buf, int len)
+ static int verify_block_sig(struct mlx5_cmd_prot_block *block)
+ {
+-      if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff)
++      size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
++      int xor_len = sizeof(*block) - sizeof(block->data) - 1;
++
++      if (xor8_buf(block, rsvd0_off, xor_len) != 0xff)
+               return -EINVAL;
+-      if (xor8_buf(block, sizeof(*block)) != 0xff)
++      if (xor8_buf(block, 0, sizeof(*block)) != 0xff)
+               return -EINVAL;
+       return 0;
+ }
+-static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token,
+-                         int csum)
++static void calc_block_sig(struct mlx5_cmd_prot_block *block)
+ {
+-      block->token = token;
+-      if (csum) {
+-              block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) -
+-                                          sizeof(block->data) - 2);
+-              block->sig = ~xor8_buf(block, sizeof(*block) - 1);
+-      }
++      int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2;
++      size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
++
++      block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len);
++      block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1);
+ }
+-static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum)
++static void calc_chain_sig(struct mlx5_cmd_msg *msg)
+ {
+       struct mlx5_cmd_mailbox *next = msg->next;
++      int size = msg->len;
++      int blen = size - min_t(int, sizeof(msg->first.data), size);
++      int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
++              / MLX5_CMD_DATA_BLOCK_SIZE;
++      int i = 0;
+-      while (next) {
+-              calc_block_sig(next->buf, token, csum);
++      for (i = 0; i < n && next; i++)  {
++              calc_block_sig(next->buf);
+               next = next->next;
+       }
+ }
+ static void set_signature(struct mlx5_cmd_work_ent *ent, int csum)
+ {
+-      ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay));
+-      calc_chain_sig(ent->in, ent->token, csum);
+-      calc_chain_sig(ent->out, ent->token, csum);
++      ent->lay->sig = ~xor8_buf(ent->lay, 0,  sizeof(*ent->lay));
++      if (csum) {
++              calc_chain_sig(ent->in);
++              calc_chain_sig(ent->out);
++      }
+ }
+ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
+@@ -222,12 +231,17 @@ static int verify_signature(struct mlx5_
+       struct mlx5_cmd_mailbox *next = ent->out->next;
+       int err;
+       u8 sig;
++      int size = ent->out->len;
++      int blen = size - min_t(int, sizeof(ent->out->first.data), size);
++      int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
++              / MLX5_CMD_DATA_BLOCK_SIZE;
++      int i = 0;
+-      sig = xor8_buf(ent->lay, sizeof(*ent->lay));
++      sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay));
+       if (sig != 0xff)
+               return -EINVAL;
+-      while (next) {
++      for (i = 0; i < n && next; i++) {
+               err = verify_block_sig(next->buf);
+               if (err)
+                       return err;
+@@ -656,7 +670,6 @@ static void cmd_work_handler(struct work
+               spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+       }
+-      ent->token = alloc_token(cmd);
+       cmd->ent_arr[ent->idx] = ent;
+       lay = get_inst(cmd, ent->idx);
+       ent->lay = lay;
+@@ -766,7 +779,8 @@ static u8 *get_status_ptr(struct mlx5_ou
+ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+                          struct mlx5_cmd_msg *out, void *uout, int uout_size,
+                          mlx5_cmd_cbk_t callback,
+-                         void *context, int page_queue, u8 *status)
++                         void *context, int page_queue, u8 *status,
++                         u8 token)
+ {
+       struct mlx5_cmd *cmd = &dev->cmd;
+       struct mlx5_cmd_work_ent *ent;
+@@ -783,6 +797,8 @@ static int mlx5_cmd_invoke(struct mlx5_c
+       if (IS_ERR(ent))
+               return PTR_ERR(ent);
++      ent->token = token;
++
+       if (!callback)
+               init_completion(&ent->done);
+@@ -854,7 +870,8 @@ static const struct file_operations fops
+       .write  = dbg_write,
+ };
+-static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size)
++static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size,
++                          u8 token)
+ {
+       struct mlx5_cmd_prot_block *block;
+       struct mlx5_cmd_mailbox *next;
+@@ -880,6 +897,7 @@ static int mlx5_copy_to_msg(struct mlx5_
+               memcpy(block->data, from, copy);
+               from += copy;
+               size -= copy;
++              block->token = token;
+               next = next->next;
+       }
+@@ -949,7 +967,8 @@ static void free_cmd_box(struct mlx5_cor
+ }
+ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
+-                                             gfp_t flags, int size)
++                                             gfp_t flags, int size,
++                                             u8 token)
+ {
+       struct mlx5_cmd_mailbox *tmp, *head = NULL;
+       struct mlx5_cmd_prot_block *block;
+@@ -978,6 +997,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_c
+               tmp->next = head;
+               block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0);
+               block->block_num = cpu_to_be32(n - i - 1);
++              block->token = token;
+               head = tmp;
+       }
+       msg->next = head;
+@@ -1352,7 +1372,7 @@ static struct mlx5_cmd_msg *alloc_msg(st
+       }
+       if (IS_ERR(msg))
+-              msg = mlx5_alloc_cmd_msg(dev, gfp, in_size);
++              msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
+       return msg;
+ }
+@@ -1377,6 +1397,7 @@ static int cmd_exec(struct mlx5_core_dev
+       int err;
+       u8 status = 0;
+       u32 drv_synd;
++      u8 token;
+       if (pci_channel_offline(dev->pdev) ||
+           dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+@@ -1395,20 +1416,22 @@ static int cmd_exec(struct mlx5_core_dev
+               return err;
+       }
+-      err = mlx5_copy_to_msg(inb, in, in_size);
++      token = alloc_token(&dev->cmd);
++
++      err = mlx5_copy_to_msg(inb, in, in_size, token);
+       if (err) {
+               mlx5_core_warn(dev, "err %d\n", err);
+               goto out_in;
+       }
+-      outb = mlx5_alloc_cmd_msg(dev, gfp, out_size);
++      outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token);
+       if (IS_ERR(outb)) {
+               err = PTR_ERR(outb);
+               goto out_in;
+       }
+       err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
+-                            pages_queue, &status);
++                            pages_queue, &status, token);
+       if (err)
+               goto out_out;
+@@ -1476,7 +1499,7 @@ static int create_msg_cache(struct mlx5_
+       INIT_LIST_HEAD(&cmd->cache.med.head);
+       for (i = 0; i < NUM_LONG_LISTS; i++) {
+-              msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE);
++              msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0);
+               if (IS_ERR(msg)) {
+                       err = PTR_ERR(msg);
+                       goto ex_err;
+@@ -1486,7 +1509,7 @@ static int create_msg_cache(struct mlx5_
+       }
+       for (i = 0; i < NUM_MED_LISTS; i++) {
+-              msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE);
++              msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0);
+               if (IS_ERR(msg)) {
+                       err = PTR_ERR(msg);
+                       goto ex_err;
diff --git a/queue-4.7/net-mlx5-fix-pci-error-recovery-flow.patch b/queue-4.7/net-mlx5-fix-pci-error-recovery-flow.patch
new file mode 100644 (file)
index 0000000..38d9864
--- /dev/null
@@ -0,0 +1,117 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Mohamad Haj Yahia <mohamad@mellanox.com>
+Date: Thu, 18 Aug 2016 21:09:04 +0300
+Subject: net/mlx5: Fix pci error recovery flow
+
+From: Mohamad Haj Yahia <mohamad@mellanox.com>
+
+
+[ Upstream commit 1061c90f524963a0a90e7d2f9a6bfa666458af51 ]
+
+When PCI error is detected we should save the state of the pci prior to
+disabling it.
+
+Also when receiving pci slot reset call we need to verify that the
+device is responsive.
+
+Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core
+driver')
+Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
+
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |   59 ++++++++++++-------------
+ 1 file changed, 29 insertions(+), 30 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1392,36 +1392,12 @@ static pci_ers_result_t mlx5_pci_err_det
+       dev_info(&pdev->dev, "%s was called\n", __func__);
+       mlx5_enter_error_state(dev);
+       mlx5_unload_one(dev, priv);
++      pci_save_state(pdev);
+       mlx5_pci_disable_device(dev);
+       return state == pci_channel_io_perm_failure ?
+               PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+ }
+-static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
+-{
+-      struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+-      int err = 0;
+-
+-      dev_info(&pdev->dev, "%s was called\n", __func__);
+-
+-      err = mlx5_pci_enable_device(dev);
+-      if (err) {
+-              dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
+-                      , __func__, err);
+-              return PCI_ERS_RESULT_DISCONNECT;
+-      }
+-      pci_set_master(pdev);
+-      pci_set_power_state(pdev, PCI_D0);
+-      pci_restore_state(pdev);
+-
+-      return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
+-}
+-
+-void mlx5_disable_device(struct mlx5_core_dev *dev)
+-{
+-      mlx5_pci_err_detected(dev->pdev, 0);
+-}
+-
+ /* wait for the device to show vital signs by waiting
+  * for the health counter to start counting.
+  */
+@@ -1449,21 +1425,44 @@ static int wait_vital(struct pci_dev *pd
+       return -ETIMEDOUT;
+ }
+-static void mlx5_pci_resume(struct pci_dev *pdev)
++static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
+ {
+       struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+-      struct mlx5_priv *priv = &dev->priv;
+       int err;
+       dev_info(&pdev->dev, "%s was called\n", __func__);
+-      pci_save_state(pdev);
+-      err = wait_vital(pdev);
++      err = mlx5_pci_enable_device(dev);
+       if (err) {
++              dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
++                      , __func__, err);
++              return PCI_ERS_RESULT_DISCONNECT;
++      }
++
++      pci_set_master(pdev);
++      pci_restore_state(pdev);
++
++      if (wait_vital(pdev)) {
+               dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
+-              return;
++              return PCI_ERS_RESULT_DISCONNECT;
+       }
++      return PCI_ERS_RESULT_RECOVERED;
++}
++
++void mlx5_disable_device(struct mlx5_core_dev *dev)
++{
++      mlx5_pci_err_detected(dev->pdev, 0);
++}
++
++static void mlx5_pci_resume(struct pci_dev *pdev)
++{
++      struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
++      struct mlx5_priv *priv = &dev->priv;
++      int err;
++
++      dev_info(&pdev->dev, "%s was called\n", __func__);
++
+       err = mlx5_load_one(dev, priv);
+       if (err)
+               dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
diff --git a/queue-4.7/net-mlx5e-fix-parsing-of-vlan-packets-when-updating-lro-header.patch b/queue-4.7/net-mlx5e-fix-parsing-of-vlan-packets-when-updating-lro-header.patch
new file mode 100644 (file)
index 0000000..1b8423d
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Gal Pressman <galp@mellanox.com>
+Date: Wed, 7 Sep 2016 19:08:01 +0300
+Subject: net/mlx5e: Fix parsing of vlan packets when updating lro header
+
+From: Gal Pressman <galp@mellanox.com>
+
+
+[ Upstream commit cd17d230dd060a12f7451c0caeedb3fd5158eaf9 ]
+
+Currently vlan tagged packets were not parsed correctly
+and assumed to be regular IPv4/IPv6 packets.
+We should check for 802.1Q/802.1ad tags and update the lro header
+accordingly.
+This fixes the use case where LRO is on and rxvlan is off
+(vlan stripping is off).
+
+Fixes: e586b3b0baee ('net/mlx5: Ethernet Datapath files')
+Signed-off-by: Gal Pressman <galp@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c |   22 +++++++++++++++-------
+ 1 file changed, 15 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -648,24 +648,32 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq
+ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
+                                u32 cqe_bcnt)
+ {
+-      struct ethhdr   *eth    = (struct ethhdr *)(skb->data);
+-      struct iphdr    *ipv4   = (struct iphdr *)(skb->data + ETH_HLEN);
+-      struct ipv6hdr  *ipv6   = (struct ipv6hdr *)(skb->data + ETH_HLEN);
++      struct ethhdr   *eth = (struct ethhdr *)(skb->data);
++      struct iphdr    *ipv4;
++      struct ipv6hdr  *ipv6;
+       struct tcphdr   *tcp;
++      int network_depth = 0;
++      __be16 proto;
++      u16 tot_len;
+       u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
+       int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA  == l4_hdr_type) ||
+                      (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type));
+-      u16 tot_len = cqe_bcnt - ETH_HLEN;
++      skb->mac_len = ETH_HLEN;
++      proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
+-      if (eth->h_proto == htons(ETH_P_IP)) {
+-              tcp = (struct tcphdr *)(skb->data + ETH_HLEN +
++      ipv4 = (struct iphdr *)(skb->data + network_depth);
++      ipv6 = (struct ipv6hdr *)(skb->data + network_depth);
++      tot_len = cqe_bcnt - network_depth;
++
++      if (proto == htons(ETH_P_IP)) {
++              tcp = (struct tcphdr *)(skb->data + network_depth +
+                                       sizeof(struct iphdr));
+               ipv6 = NULL;
+               skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+       } else {
+-              tcp = (struct tcphdr *)(skb->data + ETH_HLEN +
++              tcp = (struct tcphdr *)(skb->data + network_depth +
+                                       sizeof(struct ipv6hdr));
+               ipv4 = NULL;
+               skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
diff --git a/queue-4.7/net-mlx5e-use-correct-flow-dissector-key-on-flower-offloading.patch b/queue-4.7/net-mlx5e-use-correct-flow-dissector-key-on-flower-offloading.patch
new file mode 100644 (file)
index 0000000..fed4e60
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Hadar Hen Zion <hadarh@mellanox.com>
+Date: Thu, 18 Aug 2016 21:09:07 +0300
+Subject: net/mlx5e: Use correct flow dissector key on flower offloading
+
+From: Hadar Hen Zion <hadarh@mellanox.com>
+
+
+[ Upstream commit 1dbd0d373ac338903d27fab5204b13122cc5accd ]
+
+The wrong key is used when extracting the address type field set by
+the flower offload code. We have to use the control key and not the
+basic key, fix that.
+
+Fixes: e3a2b7ed018e ('net/mlx5e: Support offload cls_flower with drop action')
+Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -150,7 +150,7 @@ static int parse_cls_flower(struct mlx5e
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+               struct flow_dissector_key_control *key =
+                       skb_flow_dissector_target(f->dissector,
+-                                                FLOW_DISSECTOR_KEY_BASIC,
++                                                FLOW_DISSECTOR_KEY_CONTROL,
+                                                 f->key);
+               addr_type = key->addr_type;
+       }
diff --git a/queue-4.7/net-sched-fix-encoding-to-use-real-length.patch b/queue-4.7/net-sched-fix-encoding-to-use-real-length.patch
new file mode 100644 (file)
index 0000000..718a66a
--- /dev/null
@@ -0,0 +1,87 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+Date: Mon, 22 Aug 2016 07:10:20 -0400
+Subject: net sched: fix encoding to use real length
+
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+
+
+[ Upstream commit 28a10c426e81afc88514bca8e73affccf850fdf6 ]
+
+Encoding of the metadata was using the padded length as opposed to
+the real length of the data which is a bug per specification.
+This has not been an issue todate because all metadatum specified
+so far has been 32 bit where aligned and data length are the same width.
+This also includes a bug fix for validating the length of a u16 field.
+But since there is no metadata of size u16 yes we are fine to include it
+here.
+
+While at it get rid of magic numbers.
+
+Fixes: ef6980b6becb ("net sched: introduce IFE action")
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_ife.c |   18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+--- a/net/sched/act_ife.c
++++ b/net/sched/act_ife.c
+@@ -52,7 +52,7 @@ int ife_tlv_meta_encode(void *skbdata, u
+       u32 *tlv = (u32 *)(skbdata);
+       u16 totlen = nla_total_size(dlen);      /*alignment + hdr */
+       char *dptr = (char *)tlv + NLA_HDRLEN;
+-      u32 htlv = attrtype << 16 | totlen;
++      u32 htlv = attrtype << 16 | dlen;
+       *tlv = htonl(htlv);
+       memset(dptr, 0, totlen - NLA_HDRLEN);
+@@ -134,7 +134,7 @@ EXPORT_SYMBOL_GPL(ife_release_meta_gen);
+ int ife_validate_meta_u32(void *val, int len)
+ {
+-      if (len == 4)
++      if (len == sizeof(u32))
+               return 0;
+       return -EINVAL;
+@@ -143,8 +143,8 @@ EXPORT_SYMBOL_GPL(ife_validate_meta_u32)
+ int ife_validate_meta_u16(void *val, int len)
+ {
+-      /* length will include padding */
+-      if (len == NLA_ALIGN(2))
++      /* length will not include padding */
++      if (len == sizeof(u16))
+               return 0;
+       return -EINVAL;
+@@ -652,12 +652,14 @@ static int tcf_ife_decode(struct sk_buff
+               u8 *tlvdata = (u8 *)tlv;
+               u16 mtype = tlv->type;
+               u16 mlen = tlv->len;
++              u16 alen;
+               mtype = ntohs(mtype);
+               mlen = ntohs(mlen);
++              alen = NLA_ALIGN(mlen);
+-              if (find_decode_metaid(skb, ife, mtype, (mlen - 4),
+-                                     (void *)(tlvdata + 4))) {
++              if (find_decode_metaid(skb, ife, mtype, (mlen - NLA_HDRLEN),
++                                     (void *)(tlvdata + NLA_HDRLEN))) {
+                       /* abuse overlimits to count when we receive metadata
+                        * but dont have an ops for it
+                        */
+@@ -666,8 +668,8 @@ static int tcf_ife_decode(struct sk_buff
+                       ife->tcf_qstats.overlimits++;
+               }
+-              tlvdata += mlen;
+-              ifehdrln -= mlen;
++              tlvdata += alen;
++              ifehdrln -= alen;
+               tlv = (struct meta_tlvhdr *)tlvdata;
+       }
diff --git a/queue-4.7/net-sctp-always-initialise-sctp_ht_iter-start_fail.patch b/queue-4.7/net-sctp-always-initialise-sctp_ht_iter-start_fail.patch
new file mode 100644 (file)
index 0000000..afe9331
--- /dev/null
@@ -0,0 +1,77 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Vegard Nossum <vegard.nossum@oracle.com>
+Date: Fri, 12 Aug 2016 09:50:51 +0200
+Subject: net/sctp: always initialise sctp_ht_iter::start_fail
+
+From: Vegard Nossum <vegard.nossum@oracle.com>
+
+
+[ Upstream commit 54236ab09e9696a27baaae693c288920a26e8588 ]
+
+sctp_transport_seq_start() does not currently clear iter->start_fail on
+success, but relies on it being zero when it is allocated (by
+seq_open_net()).
+
+This can be a problem in the following sequence:
+
+    open() // allocates iter (and implicitly sets iter->start_fail = 0)
+    read()
+     - iter->start() // fails and sets iter->start_fail = 1
+     - iter->stop() // doesn't call sctp_transport_walk_stop() (correct)
+    read() again
+     - iter->start() // succeeds, but doesn't change iter->start_fail
+     - iter->stop() // doesn't call sctp_transport_walk_stop() (wrong)
+
+We should initialize sctp_ht_iter::start_fail to zero if ->start()
+succeeds, otherwise it's possible that we leave an old value of 1 there,
+which will cause ->stop() to not call sctp_transport_walk_stop(), which
+causes all sorts of problems like not calling rcu_read_unlock() (and
+preempt_enable()), eventually leading to more warnings like this:
+
+    BUG: sleeping function called from invalid context at mm/slab.h:388
+    in_atomic(): 0, irqs_disabled(): 0, pid: 16551, name: trinity-c2
+    Preemption disabled at:[<ffffffff819bceb6>] rhashtable_walk_start+0x46/0x150
+
+     [<ffffffff81149abb>] preempt_count_add+0x1fb/0x280
+     [<ffffffff83295892>] _raw_spin_lock+0x12/0x40
+     [<ffffffff819bceb6>] rhashtable_walk_start+0x46/0x150
+     [<ffffffff82ec665f>] sctp_transport_walk_start+0x2f/0x60
+     [<ffffffff82edda1d>] sctp_transport_seq_start+0x4d/0x150
+     [<ffffffff81439e50>] traverse+0x170/0x850
+     [<ffffffff8143aeec>] seq_read+0x7cc/0x1180
+     [<ffffffff814f996c>] proc_reg_read+0xbc/0x180
+     [<ffffffff813d0384>] do_loop_readv_writev+0x134/0x210
+     [<ffffffff813d2a95>] do_readv_writev+0x565/0x660
+     [<ffffffff813d6857>] vfs_readv+0x67/0xa0
+     [<ffffffff813d6c16>] do_preadv+0x126/0x170
+     [<ffffffff813d710c>] SyS_preadv+0xc/0x10
+     [<ffffffff8100334c>] do_syscall_64+0x19c/0x410
+     [<ffffffff83296225>] return_from_SYSCALL_64+0x0/0x6a
+     [<ffffffffffffffff>] 0xffffffffffffffff
+
+Notice that this is a subtly different stacktrace from the one in commit
+5fc382d875 ("net/sctp: terminate rhashtable walk correctly").
+
+Cc: Xin Long <lucien.xin@gmail.com>
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
+Acked-By: Neil Horman <nhorman@tuxdriver.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/proc.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/sctp/proc.c
++++ b/net/sctp/proc.c
+@@ -293,6 +293,7 @@ static void *sctp_transport_seq_start(st
+               return ERR_PTR(err);
+       }
++      iter->start_fail = 0;
+       return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos);
+ }
diff --git a/queue-4.7/net-smc91x-fix-smc-accesses.patch b/queue-4.7/net-smc91x-fix-smc-accesses.patch
new file mode 100644 (file)
index 0000000..0e6dc09
--- /dev/null
@@ -0,0 +1,263 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 27 Aug 2016 17:33:03 +0100
+Subject: net: smc91x: fix SMC accesses
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+
+[ Upstream commit 2fb04fdf30192ff1e2b5834e9b7745889ea8bbcb ]
+
+Commit b70661c70830 ("net: smc91x: use run-time configuration on all ARM
+machines") broke some ARM platforms through several mistakes.  Firstly,
+the access size must correspond to the following rule:
+
+(a) at least one of 16-bit or 8-bit access size must be supported
+(b) 32-bit accesses are optional, and may be enabled in addition to
+    the above.
+
+Secondly, it provides no emulation of 16-bit accesses, instead blindly
+making 16-bit accesses even when the platform specifies that only 8-bit
+is supported.
+
+Reorganise smc91x.h so we can make use of the existing 16-bit access
+emulation already provided - if 16-bit accesses are supported, use
+16-bit accesses directly, otherwise if 8-bit accesses are supported,
+use the provided 16-bit access emulation.  If neither, BUG().  This
+exactly reflects the driver behaviour prior to the commit being fixed.
+
+Since the conversion incorrectly cut down the available access sizes on
+several platforms, we also need to go through every platform and fix up
+the overly-restrictive access size: Arnd assumed that if a platform can
+perform 32-bit, 16-bit and 8-bit accesses, then only a 32-bit access
+size needed to be specified - not so, all available access sizes must
+be specified.
+
+This likely fixes some performance regressions in doing this: if a
+platform does not support 8-bit accesses, 8-bit accesses have been
+emulated by performing a 16-bit read-modify-write access.
+
+Tested on the Intel Assabet/Neponset platform, which supports only 8-bit
+accesses, which was broken by the original commit.
+
+Fixes: b70661c70830 ("net: smc91x: use run-time configuration on all ARM machines")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Tested-by: Robert Jarzmik <robert.jarzmik@free.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/mach-pxa/idp.c                    |    3 -
+ arch/arm/mach-pxa/xcep.c                   |    3 -
+ arch/arm/mach-realview/core.c              |    3 -
+ arch/arm/mach-sa1100/pleb.c                |    2 
+ arch/blackfin/mach-bf561/boards/cm_bf561.c |    3 -
+ arch/blackfin/mach-bf561/boards/ezkit.c    |    3 -
+ drivers/net/ethernet/smsc/smc91x.c         |    7 +++
+ drivers/net/ethernet/smsc/smc91x.h         |   65 ++++++++++++++++++++---------
+ include/linux/smc91x.h                     |   10 ++++
+ 9 files changed, 73 insertions(+), 26 deletions(-)
+
+--- a/arch/arm/mach-pxa/idp.c
++++ b/arch/arm/mach-pxa/idp.c
+@@ -83,7 +83,8 @@ static struct resource smc91x_resources[
+ };
+ static struct smc91x_platdata smc91x_platdata = {
+-      .flags = SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT,
++      .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
++               SMC91X_USE_DMA | SMC91X_NOWAIT,
+ };
+ static struct platform_device smc91x_device = {
+--- a/arch/arm/mach-pxa/xcep.c
++++ b/arch/arm/mach-pxa/xcep.c
+@@ -120,7 +120,8 @@ static struct resource smc91x_resources[
+ };
+ static struct smc91x_platdata xcep_smc91x_info = {
+-      .flags  = SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA,
++      .flags  = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
++                SMC91X_NOWAIT | SMC91X_USE_DMA,
+ };
+ static struct platform_device smc91x_device = {
+--- a/arch/arm/mach-realview/core.c
++++ b/arch/arm/mach-realview/core.c
+@@ -93,7 +93,8 @@ static struct smsc911x_platform_config s
+ };
+ static struct smc91x_platdata smc91x_platdata = {
+-      .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
++      .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
++               SMC91X_NOWAIT,
+ };
+ static struct platform_device realview_eth_device = {
+--- a/arch/arm/mach-sa1100/pleb.c
++++ b/arch/arm/mach-sa1100/pleb.c
+@@ -45,7 +45,7 @@ static struct resource smc91x_resources[
+ };
+ static struct smc91x_platdata smc91x_platdata = {
+-      .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
++      .flags = SMC91X_USE_16BIT | SMC91X_USE_8BIT | SMC91X_NOWAIT,
+ };
+ static struct platform_device smc91x_device = {
+--- a/arch/blackfin/mach-bf561/boards/cm_bf561.c
++++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c
+@@ -146,7 +146,8 @@ static struct platform_device hitachi_fb
+ #include <linux/smc91x.h>
+ static struct smc91x_platdata smc91x_info = {
+-      .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
++      .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
++               SMC91X_NOWAIT,
+       .leda = RPC_LED_100_10,
+       .ledb = RPC_LED_TX_RX,
+ };
+--- a/arch/blackfin/mach-bf561/boards/ezkit.c
++++ b/arch/blackfin/mach-bf561/boards/ezkit.c
+@@ -134,7 +134,8 @@ static struct platform_device net2272_bf
+ #include <linux/smc91x.h>
+ static struct smc91x_platdata smc91x_info = {
+-      .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
++      .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
++               SMC91X_NOWAIT,
+       .leda = RPC_LED_100_10,
+       .ledb = RPC_LED_TX_RX,
+ };
+--- a/drivers/net/ethernet/smsc/smc91x.c
++++ b/drivers/net/ethernet/smsc/smc91x.c
+@@ -2269,6 +2269,13 @@ static int smc_drv_probe(struct platform
+       if (pd) {
+               memcpy(&lp->cfg, pd, sizeof(lp->cfg));
+               lp->io_shift = SMC91X_IO_SHIFT(lp->cfg.flags);
++
++              if (!SMC_8BIT(lp) && !SMC_16BIT(lp)) {
++                      dev_err(&pdev->dev,
++                              "at least one of 8-bit or 16-bit access support is required.\n");
++                      ret = -ENXIO;
++                      goto out_free_netdev;
++              }
+       }
+ #if IS_BUILTIN(CONFIG_OF)
+--- a/drivers/net/ethernet/smsc/smc91x.h
++++ b/drivers/net/ethernet/smsc/smc91x.h
+@@ -37,6 +37,27 @@
+ #include <linux/smc91x.h>
+ /*
++ * Any 16-bit access is performed with two 8-bit accesses if the hardware
++ * can't do it directly. Most registers are 16-bit so those are mandatory.
++ */
++#define SMC_outw_b(x, a, r)                                           \
++      do {                                                            \
++              unsigned int __val16 = (x);                             \
++              unsigned int __reg = (r);                               \
++              SMC_outb(__val16, a, __reg);                            \
++              SMC_outb(__val16 >> 8, a, __reg + (1 << SMC_IO_SHIFT)); \
++      } while (0)
++
++#define SMC_inw_b(a, r)                                                       \
++      ({                                                              \
++              unsigned int __val16;                                   \
++              unsigned int __reg = r;                                 \
++              __val16  = SMC_inb(a, __reg);                           \
++              __val16 |= SMC_inb(a, __reg + (1 << SMC_IO_SHIFT)) << 8; \
++              __val16;                                                \
++      })
++
++/*
+  * Define your architecture specific bus configuration parameters here.
+  */
+@@ -55,10 +76,30 @@
+ #define SMC_IO_SHIFT          (lp->io_shift)
+ #define SMC_inb(a, r)         readb((a) + (r))
+-#define SMC_inw(a, r)         readw((a) + (r))
++#define SMC_inw(a, r)                                                 \
++      ({                                                              \
++              unsigned int __smc_r = r;                               \
++              SMC_16BIT(lp) ? readw((a) + __smc_r) :                  \
++              SMC_8BIT(lp) ? SMC_inw_b(a, __smc_r) :                  \
++              ({ BUG(); 0; });                                        \
++      })
++
+ #define SMC_inl(a, r)         readl((a) + (r))
+ #define SMC_outb(v, a, r)     writeb(v, (a) + (r))
++#define SMC_outw(v, a, r)                                             \
++      do {                                                            \
++              unsigned int __v = v, __smc_r = r;                      \
++              if (SMC_16BIT(lp))                                      \
++                      __SMC_outw(__v, a, __smc_r);                    \
++              else if (SMC_8BIT(lp))                                  \
++                      SMC_outw_b(__v, a, __smc_r);                    \
++              else                                                    \
++                      BUG();                                          \
++      } while (0)
++
+ #define SMC_outl(v, a, r)     writel(v, (a) + (r))
++#define SMC_insb(a, r, p, l)  readsb((a) + (r), p, l)
++#define SMC_outsb(a, r, p, l) writesb((a) + (r), p, l)
+ #define SMC_insw(a, r, p, l)  readsw((a) + (r), p, l)
+ #define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l)
+ #define SMC_insl(a, r, p, l)  readsl((a) + (r), p, l)
+@@ -66,7 +107,7 @@
+ #define SMC_IRQ_FLAGS         (-1)    /* from resource */
+ /* We actually can't write halfwords properly if not word aligned */
+-static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg)
++static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg)
+ {
+       if ((machine_is_mainstone() || machine_is_stargate2() ||
+            machine_is_pxa_idp()) && reg & 2) {
+@@ -416,24 +457,8 @@ smc_pxa_dma_insw(void __iomem *ioaddr, s
+ #if ! SMC_CAN_USE_16BIT
+-/*
+- * Any 16-bit access is performed with two 8-bit accesses if the hardware
+- * can't do it directly. Most registers are 16-bit so those are mandatory.
+- */
+-#define SMC_outw(x, ioaddr, reg)                                      \
+-      do {                                                            \
+-              unsigned int __val16 = (x);                             \
+-              SMC_outb( __val16, ioaddr, reg );                       \
+-              SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\
+-      } while (0)
+-#define SMC_inw(ioaddr, reg)                                          \
+-      ({                                                              \
+-              unsigned int __val16;                                   \
+-              __val16 =  SMC_inb( ioaddr, reg );                      \
+-              __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \
+-              __val16;                                                \
+-      })
+-
++#define SMC_outw(x, ioaddr, reg)      SMC_outw_b(x, ioaddr, reg)
++#define SMC_inw(ioaddr, reg)          SMC_inw_b(ioaddr, reg)
+ #define SMC_insw(a, r, p, l)          BUG()
+ #define SMC_outsw(a, r, p, l)         BUG()
+--- a/include/linux/smc91x.h
++++ b/include/linux/smc91x.h
+@@ -1,6 +1,16 @@
+ #ifndef __SMC91X_H__
+ #define __SMC91X_H__
++/*
++ * These bits define which access sizes a platform can support, rather
++ * than the maximal access size.  So, if your platform can do 16-bit
++ * and 32-bit accesses to the SMC91x device, but not 8-bit, set both
++ * SMC91X_USE_16BIT and SMC91X_USE_32BIT.
++ *
++ * The SMC91x driver requires at least one of SMC91X_USE_8BIT or
++ * SMC91X_USE_16BIT to be supported - just setting SMC91X_USE_32BIT is
++ * an invalid configuration.
++ */
+ #define SMC91X_USE_8BIT (1 << 0)
+ #define SMC91X_USE_16BIT (1 << 1)
+ #define SMC91X_USE_32BIT (1 << 2)
diff --git a/queue-4.7/revert-af_unix-fix-splice-bind-deadlock.patch b/queue-4.7/revert-af_unix-fix-splice-bind-deadlock.patch
new file mode 100644 (file)
index 0000000..a5c4c6b
--- /dev/null
@@ -0,0 +1,161 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 1 Sep 2016 14:56:49 -0700
+Subject: Revert "af_unix: Fix splice-bind deadlock"
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+
+[ Upstream commit 38f7bd94a97b542de86a2be9229289717e33a7a4 ]
+
+This reverts commit c845acb324aa85a39650a14e7696982ceea75dc1.
+
+It turns out that it just replaces one deadlock with another one: we can
+still get the wrong lock ordering with the readlock due to overlayfs
+calling back into the filesystem layer and still taking the vfs locks
+after the readlock.
+
+The proper solution ends up being to just split the readlock into two
+pieces: the bind lock (taken *outside* the vfs locks) and the IO lock
+(taken *inside* the filesystem locks).  The two locks are independent
+anyway.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/af_unix.c |   68 +++++++++++++++++++++--------------------------------
+ 1 file changed, 27 insertions(+), 41 deletions(-)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -953,20 +953,32 @@ fail:
+       return NULL;
+ }
+-static int unix_mknod(struct dentry *dentry, const struct path *path, umode_t mode,
+-                    struct path *res)
++static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+ {
+-      int err;
+-
+-      err = security_path_mknod(path, dentry, mode, 0);
++      struct dentry *dentry;
++      struct path path;
++      int err = 0;
++      /*
++       * Get the parent directory, calculate the hash for last
++       * component.
++       */
++      dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
++      err = PTR_ERR(dentry);
++      if (IS_ERR(dentry))
++              return err;
++
++      /*
++       * All right, let's create it.
++       */
++      err = security_path_mknod(&path, dentry, mode, 0);
+       if (!err) {
+-              err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
++              err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+               if (!err) {
+-                      res->mnt = mntget(path->mnt);
++                      res->mnt = mntget(path.mnt);
+                       res->dentry = dget(dentry);
+               }
+       }
+-
++      done_path_create(&path, dentry);
+       return err;
+ }
+@@ -977,12 +989,10 @@ static int unix_bind(struct socket *sock
+       struct unix_sock *u = unix_sk(sk);
+       struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
+       char *sun_path = sunaddr->sun_path;
+-      int err, name_err;
++      int err;
+       unsigned int hash;
+       struct unix_address *addr;
+       struct hlist_head *list;
+-      struct path path;
+-      struct dentry *dentry;
+       err = -EINVAL;
+       if (sunaddr->sun_family != AF_UNIX)
+@@ -998,34 +1008,14 @@ static int unix_bind(struct socket *sock
+               goto out;
+       addr_len = err;
+-      name_err = 0;
+-      dentry = NULL;
+-      if (sun_path[0]) {
+-              /* Get the parent directory, calculate the hash for last
+-               * component.
+-               */
+-              dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+-
+-              if (IS_ERR(dentry)) {
+-                      /* delay report until after 'already bound' check */
+-                      name_err = PTR_ERR(dentry);
+-                      dentry = NULL;
+-              }
+-      }
+-
+       err = mutex_lock_interruptible(&u->readlock);
+       if (err)
+-              goto out_path;
++              goto out;
+       err = -EINVAL;
+       if (u->addr)
+               goto out_up;
+-      if (name_err) {
+-              err = name_err == -EEXIST ? -EADDRINUSE : name_err;
+-              goto out_up;
+-      }
+-
+       err = -ENOMEM;
+       addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
+       if (!addr)
+@@ -1036,11 +1026,11 @@ static int unix_bind(struct socket *sock
+       addr->hash = hash ^ sk->sk_type;
+       atomic_set(&addr->refcnt, 1);
+-      if (dentry) {
+-              struct path u_path;
++      if (sun_path[0]) {
++              struct path path;
+               umode_t mode = S_IFSOCK |
+                      (SOCK_INODE(sock)->i_mode & ~current_umask());
+-              err = unix_mknod(dentry, &path, mode, &u_path);
++              err = unix_mknod(sun_path, mode, &path);
+               if (err) {
+                       if (err == -EEXIST)
+                               err = -EADDRINUSE;
+@@ -1048,9 +1038,9 @@ static int unix_bind(struct socket *sock
+                       goto out_up;
+               }
+               addr->hash = UNIX_HASH_SIZE;
+-              hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
++              hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+               spin_lock(&unix_table_lock);
+-              u->path = u_path;
++              u->path = path;
+               list = &unix_socket_table[hash];
+       } else {
+               spin_lock(&unix_table_lock);
+@@ -1073,10 +1063,6 @@ out_unlock:
+       spin_unlock(&unix_table_lock);
+ out_up:
+       mutex_unlock(&u->readlock);
+-out_path:
+-      if (dentry)
+-              done_path_create(&path, dentry);
+-
+ out:
+       return err;
+ }
diff --git a/queue-4.7/revert-phy-irq-cannot-be-shared.patch b/queue-4.7/revert-phy-irq-cannot-be-shared.patch
new file mode 100644 (file)
index 0000000..f4795a3
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Xander Huff <xander.huff@ni.com>
+Date: Wed, 24 Aug 2016 16:47:53 -0500
+Subject: Revert "phy: IRQ cannot be shared"
+
+From: Xander Huff <xander.huff@ni.com>
+
+
+[ Upstream commit c3e70edd7c2eed6acd234627a6007627f5c76e8e ]
+
+This reverts:
+  commit 33c133cc7598 ("phy: IRQ cannot be shared")
+
+On hardware with multiple PHY devices hooked up to the same IRQ line, allow
+them to share it.
+
+Sergei Shtylyov says:
+  "I'm not sure now what was the reason I concluded that the IRQ sharing
+  was impossible... most probably I thought that the kernel IRQ handling
+  code exited the loop over the IRQ actions once IRQ_HANDLED was returned
+  -- which is obviously not so in reality..."
+
+Signed-off-by: Xander Huff <xander.huff@ni.com>
+Signed-off-by: Nathan Sullivan <nathan.sullivan@ni.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/phy/phy.c
++++ b/drivers/net/phy/phy.c
+@@ -722,8 +722,10 @@ phy_err:
+ int phy_start_interrupts(struct phy_device *phydev)
+ {
+       atomic_set(&phydev->irq_disable, 0);
+-      if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt",
+-                      phydev) < 0) {
++      if (request_irq(phydev->irq, phy_interrupt,
++                              IRQF_SHARED,
++                              "phy_interrupt",
++                              phydev) < 0) {
+               pr_warn("%s: Can't get IRQ %d (PHY)\n",
+                       phydev->mdio.bus->name, phydev->irq);
+               phydev->irq = PHY_POLL;
diff --git a/queue-4.7/sctp-fix-overrun-in-sctp_diag_dump_one.patch b/queue-4.7/sctp-fix-overrun-in-sctp_diag_dump_one.patch
new file mode 100644 (file)
index 0000000..543de17
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Lance Richardson <lrichard@redhat.com>
+Date: Tue, 23 Aug 2016 11:40:52 -0400
+Subject: sctp: fix overrun in sctp_diag_dump_one()
+
+From: Lance Richardson <lrichard@redhat.com>
+
+
+[ Upstream commit 232cb53a45965f8789fbf0a9a1962f8c67ab1a3c ]
+
+The function sctp_diag_dump_one() currently performs a memcpy()
+of 64 bytes from a 16 byte field into another 16 byte field. Fix
+by using correct size, use sizeof to obtain correct size instead
+of using a hard-coded constant.
+
+Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file")
+Signed-off-by: Lance Richardson <lrichard@redhat.com>
+Reviewed-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/sctp_diag.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/sctp/sctp_diag.c
++++ b/net/sctp/sctp_diag.c
+@@ -418,11 +418,13 @@ static int sctp_diag_dump_one(struct sk_
+               paddr.v4.sin_family = AF_INET;
+       } else {
+               laddr.v6.sin6_port = req->id.idiag_sport;
+-              memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, 64);
++              memcpy(&laddr.v6.sin6_addr, req->id.idiag_src,
++                     sizeof(laddr.v6.sin6_addr));
+               laddr.v6.sin6_family = AF_INET6;
+               paddr.v6.sin6_port = req->id.idiag_dport;
+-              memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, 64);
++              memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst,
++                     sizeof(paddr.v6.sin6_addr));
+               paddr.v6.sin6_family = AF_INET6;
+       }
index dd097a9fe9e5ed3a2844b72b1e7c16d9ad8e44ea..d7bf1f6b74c3218a2b806df0c74c1d002afa16a9 100644 (file)
@@ -2,3 +2,34 @@ clocksource-drivers-sun4i-clear-interrupts-after-stopping-timer-in-probe-functio
 mips-kvm-check-for-pfn-noslot-case.patch
 fscrypto-require-write-access-to-mount-to-set-encryption-policy.patch
 drm-msm-protect-against-faults-from-copy_from_user-in-submit-ioctl.patch
+bpf-fix-method-of-ptr_to_packet-reg-id-generation.patch
+ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch
+vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch
+bpf-fix-write-helpers-with-regards-to-non-linear-parts.patch
+net-irda-handle-iriap_register_lsap-allocation-failure.patch
+net-sctp-always-initialise-sctp_ht_iter-start_fail.patch
+net-ipv6-do-not-keep-ipv6-addresses-when-ipv6-is-disabled.patch
+tipc-fix-null-pointer-dereference-in-shutdown.patch
+tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch
+net-mlx5-fix-pci-error-recovery-flow.patch
+net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch
+net-mlx5e-use-correct-flow-dissector-key-on-flower-offloading.patch
+net-sched-fix-encoding-to-use-real-length.patch
+udp-fix-poll-issue-with-zero-sized-packets.patch
+tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch
+sctp-fix-overrun-in-sctp_diag_dump_one.patch
+tun-fix-transmit-timestamp-support.patch
+net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch
+revert-phy-irq-cannot-be-shared.patch
+net-smc91x-fix-smc-accesses.patch
+bridge-re-introduce-fix-parsing-of-mldv2-reports.patch
+kcm-fix-a-socket-double-free.patch
+bonding-fix-bonding-crash.patch
+revert-af_unix-fix-splice-bind-deadlock.patch
+af_unix-split-u-readlock-into-two-iolock-and-bindlock.patch
+ipv6-release-dst-in-ping_v6_sendmsg.patch
+bnxt_en-fix-tx-push-operation-on-arm64.patch
+ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch
+tcp-fastopen-avoid-negative-sk_forward_alloc.patch
+net-mlx5e-fix-parsing-of-vlan-packets-when-updating-lro-header.patch
+tcp-cwnd-does-not-increase-in-tcp-yeah.patch
diff --git a/queue-4.7/tcp-cwnd-does-not-increase-in-tcp-yeah.patch b/queue-4.7/tcp-cwnd-does-not-increase-in-tcp-yeah.patch
new file mode 100644 (file)
index 0000000..c4b189a
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Artem Germanov <agermanov@anchorfree.com>
+Date: Wed, 7 Sep 2016 10:49:36 -0700
+Subject: tcp: cwnd does not increase in TCP YeAH
+
+From: Artem Germanov <agermanov@anchorfree.com>
+
+
+[ Upstream commit db7196a0d0984b933ccf2cd6a60e26abf466e8a3 ]
+
+Commit 76174004a0f19785a328f40388e87e982bbf69b9
+(tcp: do not slow start when cwnd equals ssthresh )
+introduced regression in TCP YeAH. Using 100ms delay 1% loss virtual
+ethernet link kernel 4.2 shows bandwidth ~500KB/s for single TCP
+connection and kernel 4.3 and above (including 4.8-rc4) shows bandwidth
+~100KB/s.
+   That is caused by stalled cwnd when cwnd equals ssthresh. This patch
+fixes it by proper increasing cwnd in this case.
+
+Signed-off-by: Artem Germanov <agermanov@anchorfree.com>
+Acked-by: Dmitry Adamushko <d.adamushko@anchorfree.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_yeah.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_yeah.c
++++ b/net/ipv4/tcp_yeah.c
+@@ -76,7 +76,7 @@ static void tcp_yeah_cong_avoid(struct s
+       if (!tcp_is_cwnd_limited(sk))
+               return;
+-      if (tp->snd_cwnd <= tp->snd_ssthresh)
++      if (tcp_in_slow_start(tp))
+               tcp_slow_start(tp, acked);
+       else if (!yeah->doing_reno_now) {
diff --git a/queue-4.7/tcp-fastopen-avoid-negative-sk_forward_alloc.patch b/queue-4.7/tcp-fastopen-avoid-negative-sk_forward_alloc.patch
new file mode 100644 (file)
index 0000000..b32a340
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 7 Sep 2016 08:34:11 -0700
+Subject: tcp: fastopen: avoid negative sk_forward_alloc
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 76061f631c2ea4ab9c4d66f3a96ecc5737f5aaf7 ]
+
+When DATA and/or FIN are carried in a SYN/ACK message or SYN message,
+we append an skb in socket receive queue, but we forget to call
+sk_forced_mem_schedule().
+
+Effect is that the socket has a negative sk->sk_forward_alloc as long as
+the message is not read by the application.
+
+Josh Hunt fixed a similar issue in commit d22e15371811 ("tcp: fix tcp
+fin memory accounting")
+
+Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Josh Hunt <johunt@akamai.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_fastopen.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv4/tcp_fastopen.c
++++ b/net/ipv4/tcp_fastopen.c
+@@ -150,6 +150,7 @@ void tcp_fastopen_add_skb(struct sock *s
+       tp->segs_in = 0;
+       tcp_segs_in(tp, skb);
+       __skb_pull(skb, tcp_hdrlen(skb));
++      sk_forced_mem_schedule(sk, skb->truesize);
+       skb_set_owner_r(skb, sk);
+       TCP_SKB_CB(skb)->seq++;
diff --git a/queue-4.7/tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch b/queue-4.7/tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch
new file mode 100644 (file)
index 0000000..61fedff
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 17 Aug 2016 05:56:26 -0700
+Subject: tcp: fix use after free in tcp_xmit_retransmit_queue()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit bb1fceca22492109be12640d49f5ea5a544c6bb4 ]
+
+When tcp_sendmsg() allocates a fresh and empty skb, it puts it at the
+tail of the write queue using tcp_add_write_queue_tail()
+
+Then it attempts to copy user data into this fresh skb.
+
+If the copy fails, we undo the work and remove the fresh skb.
+
+Unfortunately, this undo lacks the change done to tp->highest_sack and
+we can leave a dangling pointer (to a freed skb)
+
+Later, tcp_xmit_retransmit_queue() can dereference this pointer and
+access freed memory. For regular kernels where memory is not unmapped,
+this might cause SACK bugs because tcp_highest_sack_seq() is buggy,
+returning garbage instead of tp->snd_nxt, but with various debug
+features like CONFIG_DEBUG_PAGEALLOC, this can crash the kernel.
+
+This bug was found by Marco Grassi thanks to syzkaller.
+
+Fixes: 6859d49475d4 ("[TCP]: Abstract tp->highest_sack accessing & point to next skb")
+Reported-by: Marco Grassi <marco.gra@gmail.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Reviewed-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1522,6 +1522,8 @@ static inline void tcp_check_send_head(s
+ {
+       if (sk->sk_send_head == skb_unlinked)
+               sk->sk_send_head = NULL;
++      if (tcp_sk(sk)->highest_sack == skb_unlinked)
++              tcp_sk(sk)->highest_sack = NULL;
+ }
+ static inline void tcp_init_send_head(struct sock *sk)
diff --git a/queue-4.7/tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch b/queue-4.7/tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch
new file mode 100644 (file)
index 0000000..e45b5bd
--- /dev/null
@@ -0,0 +1,79 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 22 Aug 2016 11:31:10 -0700
+Subject: tcp: properly scale window in tcp_v[46]_reqsk_send_ack()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 20a2b49fc538540819a0c552877086548cff8d8d ]
+
+When sending an ack in SYN_RECV state, we must scale the offered
+window if wscale option was negotiated and accepted.
+
+Tested:
+ Following packetdrill test demonstrates the issue :
+
+0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
+// Establish a connection.
++0 < S 0:0(0) win 20000 <mss 1000,sackOK,wscale 7, nop, TS val 100 ecr 0>
++0 > S. 0:0(0) ack 1 win 28960 <mss 1460,sackOK, TS val 100 ecr 100, nop, wscale 7>
+
++0 < . 1:11(10) ack 1 win 156 <nop,nop,TS val 99 ecr 100>
+// check that window is properly scaled !
++0 > . 1:1(0) ack 1 win 226 <nop,nop,TS val 200 ecr 100>
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_ipv4.c |    8 +++++++-
+ net/ipv6/tcp_ipv6.c |    8 +++++++-
+ 2 files changed, 14 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -814,8 +814,14 @@ static void tcp_v4_reqsk_send_ack(const
+       u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
+                                            tcp_sk(sk)->snd_nxt;
++      /* RFC 7323 2.3
++       * The window field (SEG.WND) of every outgoing segment, with the
++       * exception of <SYN> segments, MUST be right-shifted by
++       * Rcv.Wind.Shift bits:
++       */
+       tcp_v4_send_ack(sock_net(sk), skb, seq,
+-                      tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
++                      tcp_rsk(req)->rcv_nxt,
++                      req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+                       tcp_time_stamp,
+                       req->ts_recent,
+                       0,
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -937,9 +937,15 @@ static void tcp_v6_reqsk_send_ack(const
+       /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
+        * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
+        */
++      /* RFC 7323 2.3
++       * The window field (SEG.WND) of every outgoing segment, with the
++       * exception of <SYN> segments, MUST be right-shifted by
++       * Rcv.Wind.Shift bits:
++       */
+       tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
+                       tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+-                      tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
++                      tcp_rsk(req)->rcv_nxt,
++                      req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+                       tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
+                       tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
+                       0, 0);
diff --git a/queue-4.7/tipc-fix-null-pointer-dereference-in-shutdown.patch b/queue-4.7/tipc-fix-null-pointer-dereference-in-shutdown.patch
new file mode 100644 (file)
index 0000000..8c98caf
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Vegard Nossum <vegard.nossum@oracle.com>
+Date: Sat, 23 Jul 2016 08:15:04 +0200
+Subject: tipc: fix NULL pointer dereference in shutdown()
+
+From: Vegard Nossum <vegard.nossum@oracle.com>
+
+
+[ Upstream commit d2fbdf76b85bcdfe57b8ef2ba09d20e8ada79abd ]
+
+tipc_msg_create() can return a NULL skb and if so, we shouldn't try to
+call tipc_node_xmit_skb() on it.
+
+    general protection fault: 0000 [#1] PREEMPT SMP KASAN
+    CPU: 3 PID: 30298 Comm: trinity-c0 Not tainted 4.7.0-rc7+ #19
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+    task: ffff8800baf09980 ti: ffff8800595b8000 task.ti: ffff8800595b8000
+    RIP: 0010:[<ffffffff830bb46b>]  [<ffffffff830bb46b>] tipc_node_xmit_skb+0x6b/0x140
+    RSP: 0018:ffff8800595bfce8  EFLAGS: 00010246
+    RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000003023b0e0
+    RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffffffff83d12580
+    RBP: ffff8800595bfd78 R08: ffffed000b2b7f32 R09: 0000000000000000
+    R10: fffffbfff0759725 R11: 0000000000000000 R12: 1ffff1000b2b7f9f
+    R13: ffff8800595bfd58 R14: ffffffff83d12580 R15: dffffc0000000000
+    FS:  00007fcdde242700(0000) GS:ffff88011af80000(0000) knlGS:0000000000000000
+    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+    CR2: 00007fcddde1db10 CR3: 000000006874b000 CR4: 00000000000006e0
+    DR0: 00007fcdde248000 DR1: 00007fcddd73d000 DR2: 00007fcdde248000
+    DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000090602
+    Stack:
+     0000000000000018 0000000000000018 0000000041b58ab3 ffffffff83954208
+     ffffffff830bb400 ffff8800595bfd30 ffffffff8309d767 0000000000000018
+     0000000000000018 ffff8800595bfd78 ffffffff8309da1a 00000000810ee611
+    Call Trace:
+     [<ffffffff830c84a3>] tipc_shutdown+0x553/0x880
+     [<ffffffff825b4a3b>] SyS_shutdown+0x14b/0x170
+     [<ffffffff8100334c>] do_syscall_64+0x19c/0x410
+     [<ffffffff83295ca5>] entry_SYSCALL64_slow_path+0x25/0x25
+    Code: 90 00 b4 0b 83 c7 00 f1 f1 f1 f1 4c 8d 6d e0 c7 40 04 00 00 00 f4 c7 40 08 f3 f3 f3 f3 48 89 d8 48 c1 e8 03 c7 45 b4 00 00 00 00 <80> 3c 30 00 75 78 48 8d 7b 08 49 8d 75 c0 48 b8 00 00 00 00 00
+    RIP  [<ffffffff830bb46b>] tipc_node_xmit_skb+0x6b/0x140
+     RSP <ffff8800595bfce8>
+    ---[ end trace 57b0484e351e71f1 ]---
+
+I feel like we should maybe return -ENOMEM or -ENOBUFS, but I'm not sure
+userspace is equipped to handle that. Anyway, this is better than a GPF
+and looks somewhat consistent with other tipc_msg_create() callers.
+
+Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
+Acked-by: Ying Xue <ying.xue@windriver.com>
+Acked-by: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/socket.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -2180,7 +2180,8 @@ restart:
+                                             TIPC_CONN_MSG, SHORT_H_SIZE,
+                                             0, dnode, onode, dport, oport,
+                                             TIPC_CONN_SHUTDOWN);
+-                      tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
++                      if (skb)
++                              tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
+               }
+               tsk->connected = 0;
+               sock->state = SS_DISCONNECTING;
diff --git a/queue-4.7/tun-fix-transmit-timestamp-support.patch b/queue-4.7/tun-fix-transmit-timestamp-support.patch
new file mode 100644 (file)
index 0000000..323f23b
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Soheil Hassas Yeganeh <soheil@google.com>
+Date: Tue, 23 Aug 2016 18:22:33 -0400
+Subject: tun: fix transmit timestamp support
+
+From: Soheil Hassas Yeganeh <soheil@google.com>
+
+
+[ Upstream commit 7b996243fab46092fb3a29c773c54be8152366e4 ]
+
+Instead of using sock_tx_timestamp, use skb_tx_timestamp to record
+software transmit timestamp of a packet.
+
+sock_tx_timestamp resets and overrides the tx_flags of the skb.
+The function is intended to be called from within the protocol
+layer when creating the skb, not from a device driver. This is
+inconsistent with other drivers and will cause issues for TCP.
+
+In TCP, we intend to sample the timestamps for the last byte
+for each sendmsg/sendpage. For that reason, tcp_sendmsg calls
+tcp_tx_timestamp only with the last skb that it generates.
+For example, if a 128KB message is split into two 64KB packets
+we want to sample the SND timestamp of the last packet. The current
+code in the tun driver, however, will result in sampling the SND
+timestamp for both packets.
+
+Also, when the last packet is split into smaller packets for
+retranmission (see tcp_fragment), the tun driver will record
+timestamps for all of the retransmitted packets and not only the
+last packet.
+
+Fixes: eda297729171 (tun: Support software transmit time stamping.)
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: Francis Yan <francisyyan@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -878,11 +878,7 @@ static netdev_tx_t tun_net_xmit(struct s
+       if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+               goto drop;
+-      if (skb->sk && sk_fullsock(skb->sk)) {
+-              sock_tx_timestamp(skb->sk, skb->sk->sk_tsflags,
+-                                &skb_shinfo(skb)->tx_flags);
+-              sw_tx_timestamp(skb);
+-      }
++      skb_tx_timestamp(skb);
+       /* Orphan the skb - required as we might hang on to it
+        * for indefinite time.
diff --git a/queue-4.7/udp-fix-poll-issue-with-zero-sized-packets.patch b/queue-4.7/udp-fix-poll-issue-with-zero-sized-packets.patch
new file mode 100644 (file)
index 0000000..4c9e3cc
--- /dev/null
@@ -0,0 +1,75 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 23 Aug 2016 13:59:33 -0700
+Subject: udp: fix poll() issue with zero sized packets
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit e83c6744e81abc93a20d0eb3b7f504a176a6126a ]
+
+Laura tracked poll() [and friends] regression caused by commit
+e6afc8ace6dd ("udp: remove headers from UDP packets before queueing")
+
+udp_poll() needs to know if there is a valid packet in receive queue,
+even if its payload length is 0.
+
+Change first_packet_length() to return an signed int, and use -1
+as the indication of an empty queue.
+
+Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing")
+Reported-by: Laura Abbott <labbott@redhat.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Tested-by: Laura Abbott <labbott@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1182,13 +1182,13 @@ out:
+  *    @sk: socket
+  *
+  *    Drops all bad checksum frames, until a valid one is found.
+- *    Returns the length of found skb, or 0 if none is found.
++ *    Returns the length of found skb, or -1 if none is found.
+  */
+-static unsigned int first_packet_length(struct sock *sk)
++static int first_packet_length(struct sock *sk)
+ {
+       struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
+       struct sk_buff *skb;
+-      unsigned int res;
++      int res;
+       __skb_queue_head_init(&list_kill);
+@@ -1203,7 +1203,7 @@ static unsigned int first_packet_length(
+               __skb_unlink(skb, rcvq);
+               __skb_queue_tail(&list_kill, skb);
+       }
+-      res = skb ? skb->len : 0;
++      res = skb ? skb->len : -1;
+       spin_unlock_bh(&rcvq->lock);
+       if (!skb_queue_empty(&list_kill)) {
+@@ -1232,7 +1232,7 @@ int udp_ioctl(struct sock *sk, int cmd,
+       case SIOCINQ:
+       {
+-              unsigned int amount = first_packet_length(sk);
++              int amount = max_t(int, 0, first_packet_length(sk));
+               return put_user(amount, (int __user *)arg);
+       }
+@@ -2184,7 +2184,7 @@ unsigned int udp_poll(struct file *file,
+       /* Check for false positives due to checksum errors */
+       if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
+-          !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk))
++          !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
+               mask &= ~(POLLIN | POLLRDNORM);
+       return mask;
diff --git a/queue-4.7/vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch b/queue-4.7/vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch
new file mode 100644 (file)
index 0000000..4469108
--- /dev/null
@@ -0,0 +1,159 @@
+From foo@baz Wed Sep 21 10:05:18 CEST 2016
+From: Lance Richardson <lrichard@redhat.com>
+Date: Tue, 9 Aug 2016 15:29:42 -0400
+Subject: vti: flush x-netns xfrm cache when vti interface is removed
+
+From: Lance Richardson <lrichard@redhat.com>
+
+
+[ Upstream commit a5d0dc810abf3d6b241777467ee1d6efb02575fc ]
+
+When executing the script included below, the netns delete operation
+hangs with the following message (repeated at 10 second intervals):
+
+  kernel:unregister_netdevice: waiting for lo to become free. Usage count = 1
+
+This occurs because a reference to the lo interface in the "secure" netns
+is still held by a dst entry in the xfrm bundle cache in the init netns.
+
+Address this problem by garbage collecting the tunnel netns flow cache
+when a cross-namespace vti interface receives a NETDEV_DOWN notification.
+
+A more detailed description of the problem scenario (referencing commands
+in the script below):
+
+(1) ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1
+
+  The vti_test interface is created in the init namespace. vti_tunnel_init()
+  attaches a struct ip_tunnel to the vti interface's netdev_priv(dev),
+  setting the tunnel net to &init_net.
+
+(2) ip link set vti_test netns secure
+
+  The vti_test interface is moved to the "secure" netns. Note that
+  the associated struct ip_tunnel still has tunnel->net set to &init_net.
+
+(3) ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1
+
+  The first packet sent using the vti device causes xfrm_lookup() to be
+  called as follows:
+
+      dst = xfrm_lookup(tunnel->net, skb_dst(skb), fl, NULL, 0);
+
+  Note that tunnel->net is the init namespace, while skb_dst(skb) references
+  the vti_test interface in the "secure" namespace. The returned dst
+  references an interface in the init namespace.
+
+  Also note that the first parameter to xfrm_lookup() determines which flow
+  cache is used to store the computed xfrm bundle, so after xfrm_lookup()
+  returns there will be a cached bundle in the init namespace flow cache
+  with a dst referencing a device in the "secure" namespace.
+
+(4) ip netns del secure
+
+  Kernel begins to delete the "secure" namespace.  At some point the
+  vti_test interface is deleted, at which point dst_ifdown() changes
+  the dst->dev in the cached xfrm bundle flow from vti_test to lo (still
+  in the "secure" namespace however).
+  Since nothing has happened to cause the init namespace's flow cache
+  to be garbage collected, this dst remains attached to the flow cache,
+  so the kernel loops waiting for the last reference to lo to go away.
+
+<Begin script>
+ip link add br1 type bridge
+ip link set dev br1 up
+ip addr add dev br1 1.1.1.1/8
+
+ip netns add secure
+ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1
+ip link set vti_test netns secure
+ip netns exec secure ip link set vti_test up
+ip netns exec secure ip link s lo up
+ip netns exec secure ip addr add dev lo 192.168.100.1/24
+ip netns exec secure ip route add 192.168.200.0/24 dev vti_test
+ip xfrm policy flush
+ip xfrm state flush
+ip xfrm policy add dir out tmpl src 1.1.1.1 dst 1.1.1.2 \
+   proto esp mode tunnel mark 1
+ip xfrm policy add dir in tmpl src 1.1.1.2 dst 1.1.1.1 \
+   proto esp mode tunnel mark 1
+ip xfrm state add src 1.1.1.1 dst 1.1.1.2 proto esp spi 1 \
+   mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788
+ip xfrm state add src 1.1.1.2 dst 1.1.1.1 proto esp spi 1 \
+   mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788
+
+ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1
+
+ip netns del secure
+<End script>
+
+Reported-by: Hangbin Liu <haliu@redhat.com>
+Reported-by: Jan Tluka <jtluka@redhat.com>
+Signed-off-by: Lance Richardson <lrichard@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_vti.c |   31 +++++++++++++++++++++++++++++++
+ 1 file changed, 31 insertions(+)
+
+--- a/net/ipv4/ip_vti.c
++++ b/net/ipv4/ip_vti.c
+@@ -557,6 +557,33 @@ static struct rtnl_link_ops vti_link_ops
+       .get_link_net   = ip_tunnel_get_link_net,
+ };
++static bool is_vti_tunnel(const struct net_device *dev)
++{
++      return dev->netdev_ops == &vti_netdev_ops;
++}
++
++static int vti_device_event(struct notifier_block *unused,
++                          unsigned long event, void *ptr)
++{
++      struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++      struct ip_tunnel *tunnel = netdev_priv(dev);
++
++      if (!is_vti_tunnel(dev))
++              return NOTIFY_DONE;
++
++      switch (event) {
++      case NETDEV_DOWN:
++              if (!net_eq(tunnel->net, dev_net(dev)))
++                      xfrm_garbage_collect(tunnel->net);
++              break;
++      }
++      return NOTIFY_DONE;
++}
++
++static struct notifier_block vti_notifier_block __read_mostly = {
++      .notifier_call = vti_device_event,
++};
++
+ static int __init vti_init(void)
+ {
+       const char *msg;
+@@ -564,6 +591,8 @@ static int __init vti_init(void)
+       pr_info("IPv4 over IPsec tunneling driver\n");
++      register_netdevice_notifier(&vti_notifier_block);
++
+       msg = "tunnel device";
+       err = register_pernet_device(&vti_net_ops);
+       if (err < 0)
+@@ -596,6 +625,7 @@ xfrm_proto_ah_failed:
+ xfrm_proto_esp_failed:
+       unregister_pernet_device(&vti_net_ops);
+ pernet_dev_failed:
++      unregister_netdevice_notifier(&vti_notifier_block);
+       pr_err("vti init: failed to register %s\n", msg);
+       return err;
+ }
+@@ -607,6 +637,7 @@ static void __exit vti_fini(void)
+       xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
+       xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
+       unregister_pernet_device(&vti_net_ops);
++      unregister_netdevice_notifier(&vti_notifier_block);
+ }
+ module_init(vti_init);