]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Feb 2016 22:44:52 +0000 (14:44 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Feb 2016 22:44:52 +0000 (14:44 -0800)
added patches:
af_iucv-validate-socket-address-length-in-iucv_sock_bind.patch
af_unix-don-t-set-err-in-unix_stream_read_generic-unless-there-was-an-error.patch
af_unix-fix-struct-pid-memory-leak.patch
af_unix-guard-against-other-sk-in-unix_dgram_sendmsg.patch
bonding-fix-arp-monitor-validation.patch
bpf-fix-branch-offset-adjustment-on-backjumps-after-patching-ctx-expansion.patch
enic-increment-devcmd2-result-ring-in-case-of-timeout.patch
flow_dissector-fix-unaligned-access-in-__skb_flow_dissector-when-used-by-eth_get_headlen.patch
gro-make-gro-aware-of-lightweight-tunnels.patch
iff_no_queue-fix-for-drivers-not-calling-ether_setup.patch
inet-frag-always-orphan-skbs-inside-ip_defrag.patch
ipv4-fix-memory-leaks-in-ip_cmsg_send-callers.patch
ipv6-addrconf-fix-recursive-spin-lock-call.patch
ipv6-enforce-flowi6_oif-usage-in-ip6_dst_lookup_tail.patch
ipv6-fix-a-lockdep-splat.patch
ipv6-udp-use-sticky-pktinfo-egress-ifindex-on-connect.patch
l2tp-fix-error-creating-l2tp-tunnels.patch
lwt-fix-rx-checksum-setting-for-lwt-devices-tunneling-over-ipv6.patch
net-add-sysctl_max_skb_frags.patch
net-copy-inner-l3-and-l4-headers-as-unaligned-on-gre-teb.patch
net-dp83640-fix-tx-timestamp-overflow-handling.patch
net-dsa-fix-mv88e6xxx-switches.patch
net-mlx4_en-avoid-changing-dev-features-directly-in-run-time.patch
net-mlx4_en-choose-time-stamping-shift-value-according-to-hw-frequency.patch
net-mlx4_en-count-hw-buffer-overrun-only-once.patch
net_sched-fix-reclassification-needs-to-consider-ether-protocol-changes.patch
pppoe-fix-reference-counting-in-pppoe-proxy.patch
pptp-fix-illegal-memory-access-caused-by-multiple-bind-s.patch
qmi_wwan-add-4g-lte-usb-modem-u901.patch
route-check-and-remove-route-cache-when-we-get-route.patch
rtnl-rtm_getnetconf-fix-wrong-return-value.patch
sctp-allow-setting-sctp_sack_immediately-by-the-application.patch
sctp-fix-port-hash-table-size-computation.patch
sctp-translate-network-order-to-host-order-when-users-get-a-hmacid.patch
switchdev-require-rtnl-mutex-to-be-held-when-sending-fdb-notifications.patch
tcp-beware-of-alignments-in-tcp_get_info.patch
tcp-dccp-fix-another-race-at-listener-dismantle.patch
tcp-do-not-drop-syn_recv-on-all-icmp-reports.patch
tcp-fix-null-deref-in-tcp_v4_send_ack.patch
tcp-md5-release-request-socket-instead-of-listener.patch
tg3-fix-for-tg3-transmit-queue-0-timed-out-when-too-many-gso_segs.patch
tipc-fix-connection-abort-during-subscription-cancel.patch
tipc-fix-premature-addition-of-node-to-lookup-table.patch
tipc-unlock-in-error-path.patch
tunnels-allow-ipv6-udp-checksums-to-be-correctly-controlled.patch
unix-correctly-track-in-flight-fds-in-sending-process-user_struct.patch
unix_diag-fix-incorrect-sign-extension-in-unix_lookup_by_ino.patch

48 files changed:
queue-4.4/af_iucv-validate-socket-address-length-in-iucv_sock_bind.patch [new file with mode: 0644]
queue-4.4/af_unix-don-t-set-err-in-unix_stream_read_generic-unless-there-was-an-error.patch [new file with mode: 0644]
queue-4.4/af_unix-fix-struct-pid-memory-leak.patch [new file with mode: 0644]
queue-4.4/af_unix-guard-against-other-sk-in-unix_dgram_sendmsg.patch [new file with mode: 0644]
queue-4.4/bonding-fix-arp-monitor-validation.patch [new file with mode: 0644]
queue-4.4/bpf-fix-branch-offset-adjustment-on-backjumps-after-patching-ctx-expansion.patch [new file with mode: 0644]
queue-4.4/enic-increment-devcmd2-result-ring-in-case-of-timeout.patch [new file with mode: 0644]
queue-4.4/flow_dissector-fix-unaligned-access-in-__skb_flow_dissector-when-used-by-eth_get_headlen.patch [new file with mode: 0644]
queue-4.4/gro-make-gro-aware-of-lightweight-tunnels.patch [new file with mode: 0644]
queue-4.4/iff_no_queue-fix-for-drivers-not-calling-ether_setup.patch [new file with mode: 0644]
queue-4.4/inet-frag-always-orphan-skbs-inside-ip_defrag.patch [new file with mode: 0644]
queue-4.4/ipv4-fix-memory-leaks-in-ip_cmsg_send-callers.patch [new file with mode: 0644]
queue-4.4/ipv6-addrconf-fix-recursive-spin-lock-call.patch [new file with mode: 0644]
queue-4.4/ipv6-enforce-flowi6_oif-usage-in-ip6_dst_lookup_tail.patch [new file with mode: 0644]
queue-4.4/ipv6-fix-a-lockdep-splat.patch [new file with mode: 0644]
queue-4.4/ipv6-udp-use-sticky-pktinfo-egress-ifindex-on-connect.patch [new file with mode: 0644]
queue-4.4/l2tp-fix-error-creating-l2tp-tunnels.patch [new file with mode: 0644]
queue-4.4/lwt-fix-rx-checksum-setting-for-lwt-devices-tunneling-over-ipv6.patch [new file with mode: 0644]
queue-4.4/net-add-sysctl_max_skb_frags.patch [new file with mode: 0644]
queue-4.4/net-copy-inner-l3-and-l4-headers-as-unaligned-on-gre-teb.patch [new file with mode: 0644]
queue-4.4/net-dp83640-fix-tx-timestamp-overflow-handling.patch [new file with mode: 0644]
queue-4.4/net-dsa-fix-mv88e6xxx-switches.patch [new file with mode: 0644]
queue-4.4/net-mlx4_en-avoid-changing-dev-features-directly-in-run-time.patch [new file with mode: 0644]
queue-4.4/net-mlx4_en-choose-time-stamping-shift-value-according-to-hw-frequency.patch [new file with mode: 0644]
queue-4.4/net-mlx4_en-count-hw-buffer-overrun-only-once.patch [new file with mode: 0644]
queue-4.4/net_sched-fix-reclassification-needs-to-consider-ether-protocol-changes.patch [new file with mode: 0644]
queue-4.4/pppoe-fix-reference-counting-in-pppoe-proxy.patch [new file with mode: 0644]
queue-4.4/pptp-fix-illegal-memory-access-caused-by-multiple-bind-s.patch [new file with mode: 0644]
queue-4.4/qmi_wwan-add-4g-lte-usb-modem-u901.patch [new file with mode: 0644]
queue-4.4/route-check-and-remove-route-cache-when-we-get-route.patch [new file with mode: 0644]
queue-4.4/rtnl-rtm_getnetconf-fix-wrong-return-value.patch [new file with mode: 0644]
queue-4.4/sctp-allow-setting-sctp_sack_immediately-by-the-application.patch [new file with mode: 0644]
queue-4.4/sctp-fix-port-hash-table-size-computation.patch [new file with mode: 0644]
queue-4.4/sctp-translate-network-order-to-host-order-when-users-get-a-hmacid.patch [new file with mode: 0644]
queue-4.4/series [new file with mode: 0644]
queue-4.4/switchdev-require-rtnl-mutex-to-be-held-when-sending-fdb-notifications.patch [new file with mode: 0644]
queue-4.4/tcp-beware-of-alignments-in-tcp_get_info.patch [new file with mode: 0644]
queue-4.4/tcp-dccp-fix-another-race-at-listener-dismantle.patch [new file with mode: 0644]
queue-4.4/tcp-do-not-drop-syn_recv-on-all-icmp-reports.patch [new file with mode: 0644]
queue-4.4/tcp-fix-null-deref-in-tcp_v4_send_ack.patch [new file with mode: 0644]
queue-4.4/tcp-md5-release-request-socket-instead-of-listener.patch [new file with mode: 0644]
queue-4.4/tg3-fix-for-tg3-transmit-queue-0-timed-out-when-too-many-gso_segs.patch [new file with mode: 0644]
queue-4.4/tipc-fix-connection-abort-during-subscription-cancel.patch [new file with mode: 0644]
queue-4.4/tipc-fix-premature-addition-of-node-to-lookup-table.patch [new file with mode: 0644]
queue-4.4/tipc-unlock-in-error-path.patch [new file with mode: 0644]
queue-4.4/tunnels-allow-ipv6-udp-checksums-to-be-correctly-controlled.patch [new file with mode: 0644]
queue-4.4/unix-correctly-track-in-flight-fds-in-sending-process-user_struct.patch [new file with mode: 0644]
queue-4.4/unix_diag-fix-incorrect-sign-extension-in-unix_lookup_by_ino.patch [new file with mode: 0644]

diff --git a/queue-4.4/af_iucv-validate-socket-address-length-in-iucv_sock_bind.patch b/queue-4.4/af_iucv-validate-socket-address-length-in-iucv_sock_bind.patch
new file mode 100644 (file)
index 0000000..fd4325a
--- /dev/null
@@ -0,0 +1,30 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Ursula Braun <ursula.braun@de.ibm.com>
+Date: Tue, 19 Jan 2016 10:41:33 +0100
+Subject: af_iucv: Validate socket address length in iucv_sock_bind()
+
+From: Ursula Braun <ursula.braun@de.ibm.com>
+
+[ Upstream commit 52a82e23b9f2a9e1d429c5207f8575784290d008 ]
+
+Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Reviewed-by: Evgeny Cherkashin <Eugene.Crosser@ru.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/iucv/af_iucv.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/iucv/af_iucv.c
++++ b/net/iucv/af_iucv.c
+@@ -708,6 +708,9 @@ static int iucv_sock_bind(struct socket
+       if (!addr || addr->sa_family != AF_IUCV)
+               return -EINVAL;
++      if (addr_len < sizeof(struct sockaddr_iucv))
++              return -EINVAL;
++
+       lock_sock(sk);
+       if (sk->sk_state != IUCV_OPEN) {
+               err = -EBADFD;
diff --git a/queue-4.4/af_unix-don-t-set-err-in-unix_stream_read_generic-unless-there-was-an-error.patch b/queue-4.4/af_unix-don-t-set-err-in-unix_stream_read_generic-unless-there-was-an-error.patch
new file mode 100644 (file)
index 0000000..a3ecfc8
--- /dev/null
@@ -0,0 +1,74 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
+Date: Mon, 8 Feb 2016 18:47:19 +0000
+Subject: af_unix: Don't set err in unix_stream_read_generic unless there was an error
+
+From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
+
+[ Upstream commit 1b92ee3d03af6643df395300ba7748f19ecdb0c5 ]
+
+The present unix_stream_read_generic contains various code sequences of
+the form
+
+err = -EDISASTER;
+if (<test>)
+       goto out;
+
+This has the unfortunate side effect of possibly causing the error code
+to bleed through to the final
+
+out:
+       return copied ? : err;
+
+and then to be wrongly returned if no data was copied because the caller
+didn't supply a data buffer, as demonstrated by the program available at
+
+http://pad.lv/1540731
+
+Change it such that err is only set if an error condition was detected.
+
+Fixes: 3822b5c2fc62 ("af_unix: Revert 'lock_interruptible' in stream receive code")
+Reported-by: Joseph Salisbury <joseph.salisbury@canonical.com>
+Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/af_unix.c |   16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -2270,13 +2270,15 @@ static int unix_stream_read_generic(stru
+       size_t size = state->size;
+       unsigned int last_len;
+-      err = -EINVAL;
+-      if (sk->sk_state != TCP_ESTABLISHED)
++      if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
++              err = -EINVAL;
+               goto out;
++      }
+-      err = -EOPNOTSUPP;
+-      if (flags & MSG_OOB)
++      if (unlikely(flags & MSG_OOB)) {
++              err = -EOPNOTSUPP;
+               goto out;
++      }
+       target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
+       timeo = sock_rcvtimeo(sk, noblock);
+@@ -2322,9 +2324,11 @@ again:
+                               goto unlock;
+                       unix_state_unlock(sk);
+-                      err = -EAGAIN;
+-                      if (!timeo)
++                      if (!timeo) {
++                              err = -EAGAIN;
+                               break;
++                      }
++
+                       mutex_unlock(&u->readlock);
+                       timeo = unix_stream_data_wait(sk, timeo, last,
diff --git a/queue-4.4/af_unix-fix-struct-pid-memory-leak.patch b/queue-4.4/af_unix-fix-struct-pid-memory-leak.patch
new file mode 100644 (file)
index 0000000..11c50cf
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 24 Jan 2016 13:53:50 -0800
+Subject: af_unix: fix struct pid memory leak
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit fa0dc04df259ba2df3ce1920e9690c7842f8fa4b ]
+
+Dmitry reported a struct pid leak detected by a syzkaller program.
+
+Bug happens in unix_stream_recvmsg() when we break the loop when a
+signal is pending, without properly releasing scm.
+
+Fixes: b3ca9b02b007 ("net: fix multithreaded signal handling in unix recv routines")
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Rainer Weikusat <rweikusat@mobileactivedefense.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/af_unix.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -2332,6 +2332,7 @@ again:
+                       if (signal_pending(current)) {
+                               err = sock_intr_errno(timeo);
++                              scm_destroy(&scm);
+                               goto out;
+                       }
diff --git a/queue-4.4/af_unix-guard-against-other-sk-in-unix_dgram_sendmsg.patch b/queue-4.4/af_unix-guard-against-other-sk-in-unix_dgram_sendmsg.patch
new file mode 100644 (file)
index 0000000..4fb6a6c
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
+Date: Thu, 11 Feb 2016 19:37:27 +0000
+Subject: af_unix: Guard against other == sk in unix_dgram_sendmsg
+
+From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
+
+[ Upstream commit a5527dda344fff0514b7989ef7a755729769daa1 ]
+
+The unix_dgram_sendmsg routine use the following test
+
+if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+
+to determine if sk and other are in an n:1 association (either
+established via connect or by using sendto to send messages to an
+unrelated socket identified by address). This isn't correct as the
+specified address could have been bound to the sending socket itself or
+because this socket could have been connected to itself by the time of
+the unix_peer_get but disconnected before the unix_state_lock(other). In
+both cases, the if-block would be entered despite other == sk which
+might either block the sender unintentionally or lead to trying to unlock
+the same spin lock twice for a non-blocking send. Add a other != sk
+check to guard against this.
+
+Fixes: 7d267278a9ec ("unix: avoid use-after-free in ep_remove_wait_queue")
+Reported-By: Philipp Hahn <pmhahn@pmhahn.de>
+Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com>
+Tested-by: Philipp Hahn <pmhahn@pmhahn.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/af_unix.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -1781,7 +1781,12 @@ restart_locked:
+                       goto out_unlock;
+       }
+-      if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
++      /* other == sk && unix_peer(other) != sk if
++       * - unix_peer(sk) == NULL, destination address bound to sk
++       * - unix_peer(sk) == sk by time of get but disconnected before lock
++       */
++      if (other != sk &&
++          unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+               if (timeo) {
+                       timeo = unix_wait_for_peer(other, timeo);
diff --git a/queue-4.4/bonding-fix-arp-monitor-validation.patch b/queue-4.4/bonding-fix-arp-monitor-validation.patch
new file mode 100644 (file)
index 0000000..17874a3
--- /dev/null
@@ -0,0 +1,122 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Jay Vosburgh <jay.vosburgh@canonical.com>
+Date: Tue, 2 Feb 2016 13:35:56 -0800
+Subject: bonding: Fix ARP monitor validation
+
+From: Jay Vosburgh <jay.vosburgh@canonical.com>
+
+[ Upstream commit 21a75f0915dde8674708b39abfcda113911c49b1 ]
+
+The current logic in bond_arp_rcv will accept an incoming ARP for
+validation if (a) the receiving slave is either "active" (which includes
+the currently active slave, or the current ARP slave) or, (b) there is a
+currently active slave, and it has received an ARP since it became active.
+For case (b), the receiving slave isn't the currently active slave, and is
+receiving the original broadcast ARP request, not an ARP reply from the
+target.
+
+       This logic can fail if there is no currently active slave.  In
+this situation, the ARP probe logic cycles through all slaves, assigning
+each in turn as the "current_arp_slave" for one arp_interval, then setting
+that one as "active," and sending an ARP probe from that slave.  The
+current logic expects the ARP reply to arrive on the sending
+current_arp_slave, however, due to switch FDB updating delays, the reply
+may be directed to another slave.
+
+       This can arise if the bonding slaves and switch are working, but
+the ARP target is not responding.  When the ARP target recovers, a
+condition may result wherein the ARP target host replies faster than the
+switch can update its forwarding table, causing each ARP reply to be sent
+to the previous current_arp_slave.  This will never pass the logic in
+bond_arp_rcv, as neither of the above conditions (a) or (b) are met.
+
+       Some experimentation on a LAN shows ARP reply round trips in the
+200 usec range, but my available switches never update their FDB in less
+than 4000 usec.
+
+       This patch changes the logic in bond_arp_rcv to additionally
+accept an ARP reply for validation on any slave if there is a current ARP
+slave and it sent an ARP probe during the previous arp_interval.
+
+Fixes: aeea64ac717a ("bonding: don't trust arp requests unless active slave really works")
+Cc: Veaceslav Falico <vfalico@gmail.com>
+Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
+Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |   39 ++++++++++++++++++++++++++++-----------
+ 1 file changed, 28 insertions(+), 11 deletions(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -214,6 +214,8 @@ static void bond_uninit(struct net_devic
+ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
+                                               struct rtnl_link_stats64 *stats);
+ static void bond_slave_arr_handler(struct work_struct *work);
++static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
++                                int mod);
+ /*---------------------------- General routines -----------------------------*/
+@@ -2418,7 +2420,7 @@ int bond_arp_rcv(const struct sk_buff *s
+                struct slave *slave)
+ {
+       struct arphdr *arp = (struct arphdr *)skb->data;
+-      struct slave *curr_active_slave;
++      struct slave *curr_active_slave, *curr_arp_slave;
+       unsigned char *arp_ptr;
+       __be32 sip, tip;
+       int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
+@@ -2465,26 +2467,41 @@ int bond_arp_rcv(const struct sk_buff *s
+                    &sip, &tip);
+       curr_active_slave = rcu_dereference(bond->curr_active_slave);
++      curr_arp_slave = rcu_dereference(bond->current_arp_slave);
+-      /* Backup slaves won't see the ARP reply, but do come through
+-       * here for each ARP probe (so we swap the sip/tip to validate
+-       * the probe).  In a "redundant switch, common router" type of
+-       * configuration, the ARP probe will (hopefully) travel from
+-       * the active, through one switch, the router, then the other
+-       * switch before reaching the backup.
++      /* We 'trust' the received ARP enough to validate it if:
+        *
+-       * We 'trust' the arp requests if there is an active slave and
+-       * it received valid arp reply(s) after it became active. This
+-       * is done to avoid endless looping when we can't reach the
++       * (a) the slave receiving the ARP is active (which includes the
++       * current ARP slave, if any), or
++       *
++       * (b) the receiving slave isn't active, but there is a currently
++       * active slave and it received valid arp reply(s) after it became
++       * the currently active slave, or
++       *
++       * (c) there is an ARP slave that sent an ARP during the prior ARP
++       * interval, and we receive an ARP reply on any slave.  We accept
++       * these because switch FDB update delays may deliver the ARP
++       * reply to a slave other than the sender of the ARP request.
++       *
++       * Note: for (b), backup slaves are receiving the broadcast ARP
++       * request, not a reply.  This request passes from the sending
++       * slave through the L2 switch(es) to the receiving slave.  Since
++       * this is checking the request, sip/tip are swapped for
++       * validation.
++       *
++       * This is done to avoid endless looping when we can't reach the
+        * arp_ip_target and fool ourselves with our own arp requests.
+        */
+-
+       if (bond_is_active_slave(slave))
+               bond_validate_arp(bond, slave, sip, tip);
+       else if (curr_active_slave &&
+                time_after(slave_last_rx(bond, curr_active_slave),
+                           curr_active_slave->last_link_up))
+               bond_validate_arp(bond, slave, tip, sip);
++      else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
++               bond_time_in_interval(bond,
++                                     dev_trans_start(curr_arp_slave->dev), 1))
++              bond_validate_arp(bond, slave, sip, tip);
+ out_unlock:
+       if (arp != (struct arphdr *)skb->data)
diff --git a/queue-4.4/bpf-fix-branch-offset-adjustment-on-backjumps-after-patching-ctx-expansion.patch b/queue-4.4/bpf-fix-branch-offset-adjustment-on-backjumps-after-patching-ctx-expansion.patch
new file mode 100644 (file)
index 0000000..3640f9c
--- /dev/null
@@ -0,0 +1,91 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 10 Feb 2016 16:47:11 +0100
+Subject: bpf: fix branch offset adjustment on backjumps after patching ctx expansion
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit a1b14d27ed0965838350f1377ff97c93ee383492 ]
+
+When ctx access is used, the kernel often needs to expand/rewrite
+instructions, so after that patching, branch offsets have to be
+adjusted for both forward and backward jumps in the new eBPF program,
+but for backward jumps it fails to account the delta. Meaning, for
+example, if the expansion happens exactly on the insn that sits at
+the jump target, it doesn't fix up the back jump offset.
+
+Analysis on what the check in adjust_branches() is currently doing:
+
+  /* adjust offset of jmps if necessary */
+  if (i < pos && i + insn->off + 1 > pos)
+    insn->off += delta;
+  else if (i > pos && i + insn->off + 1 < pos)
+    insn->off -= delta;
+
+First condition (forward jumps):
+
+  Before:                         After:
+
+  insns[0]                        insns[0]
+  insns[1] <--- i/insn            insns[1] <--- i/insn
+  insns[2] <--- pos               insns[P] <--- pos
+  insns[3]                        insns[P]  `------| delta
+  insns[4] <--- target_X          insns[P]   `-----|
+  insns[5]                        insns[3]
+                                  insns[4] <--- target_X
+                                  insns[5]
+
+First case is if we cross pos-boundary and the jump instruction was
+before pos. This is handeled correctly. I.e. if i == pos, then this
+would mean our jump that we currently check was the patchlet itself
+that we just injected. Since such patchlets are self-contained and
+have no awareness of any insns before or after the patched one, the
+delta is correctly not adjusted. Also, for the second condition in
+case of i + insn->off + 1 == pos, means we jump to that newly patched
+instruction, so no offset adjustment are needed. That part is correct.
+
+Second condition (backward jumps):
+
+  Before:                         After:
+
+  insns[0]                        insns[0]
+  insns[1] <--- target_X          insns[1] <--- target_X
+  insns[2] <--- pos <-- target_Y  insns[P] <--- pos <-- target_Y
+  insns[3]                        insns[P]  `------| delta
+  insns[4] <--- i/insn            insns[P]   `-----|
+  insns[5]                        insns[3]
+                                  insns[4] <--- i/insn
+                                  insns[5]
+
+Second interesting case is where we cross pos-boundary and the jump
+instruction was after pos. Backward jump with i == pos would be
+impossible and pose a bug somewhere in the patchlet, so the first
+condition checking i > pos is okay only by itself. However, i +
+insn->off + 1 < pos does not always work as intended to trigger the
+adjustment. It works when jump targets would be far off where the
+delta wouldn't matter. But, for example, where the fixed insn->off
+before pointed to pos (target_Y), it now points to pos + delta, so
+that additional room needs to be taken into account for the check.
+This means that i) both tests here need to be adjusted into pos + delta,
+and ii) for the second condition, the test needs to be <= as pos
+itself can be a target in the backjump, too.
+
+Fixes: 9bac3d6d548e ("bpf: allow extended BPF programs access skb fields")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -2082,7 +2082,7 @@ static void adjust_branches(struct bpf_p
+               /* adjust offset of jmps if necessary */
+               if (i < pos && i + insn->off + 1 > pos)
+                       insn->off += delta;
+-              else if (i > pos && i + insn->off + 1 < pos)
++              else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
+                       insn->off -= delta;
+       }
+ }
diff --git a/queue-4.4/enic-increment-devcmd2-result-ring-in-case-of-timeout.patch b/queue-4.4/enic-increment-devcmd2-result-ring-in-case-of-timeout.patch
new file mode 100644 (file)
index 0000000..7025a95
--- /dev/null
@@ -0,0 +1,73 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Sandeep Pillai <sanpilla@cisco.com>
+Date: Wed, 3 Feb 2016 14:40:44 +0530
+Subject: enic: increment devcmd2 result ring in case of timeout
+
+From: Sandeep Pillai <sanpilla@cisco.com>
+
+[ Upstream commit ca7f41a4957b872577807169bd7464b36aae9b9c ]
+
+Firmware posts the devcmd result in result ring. In case of timeout, driver
+does not increment the current result pointer and firmware could post the
+result after timeout has occurred. During next devcmd, driver would be
+reading the result of previous devcmd.
+
+Fix this by incrementing result even in case of timeout.
+
+Fixes: 373fb0873d43 ("enic: add devcmd2")
+Signed-off-by: Sandeep Pillai <sanpilla@cisco.com>
+Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cisco/enic/enic.h     |    2 +-
+ drivers/net/ethernet/cisco/enic/vnic_dev.c |   19 ++++++++++++-------
+ 2 files changed, 13 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/ethernet/cisco/enic/enic.h
++++ b/drivers/net/ethernet/cisco/enic/enic.h
+@@ -33,7 +33,7 @@
+ #define DRV_NAME              "enic"
+ #define DRV_DESCRIPTION               "Cisco VIC Ethernet NIC Driver"
+-#define DRV_VERSION           "2.3.0.12"
++#define DRV_VERSION           "2.3.0.20"
+ #define DRV_COPYRIGHT         "Copyright 2008-2013 Cisco Systems, Inc"
+ #define ENIC_BARS_MAX         6
+--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
++++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c
+@@ -298,7 +298,8 @@ static int _vnic_dev_cmd2(struct vnic_de
+                         int wait)
+ {
+       struct devcmd2_controller *dc2c = vdev->devcmd2;
+-      struct devcmd2_result *result = dc2c->result + dc2c->next_result;
++      struct devcmd2_result *result;
++      u8 color;
+       unsigned int i;
+       int delay, err;
+       u32 fetch_index, new_posted;
+@@ -336,13 +337,17 @@ static int _vnic_dev_cmd2(struct vnic_de
+       if (dc2c->cmd_ring[posted].flags & DEVCMD2_FNORESULT)
+               return 0;
++      result = dc2c->result + dc2c->next_result;
++      color = dc2c->color;
++
++      dc2c->next_result++;
++      if (dc2c->next_result == dc2c->result_size) {
++              dc2c->next_result = 0;
++              dc2c->color = dc2c->color ? 0 : 1;
++      }
++
+       for (delay = 0; delay < wait; delay++) {
+-              if (result->color == dc2c->color) {
+-                      dc2c->next_result++;
+-                      if (dc2c->next_result == dc2c->result_size) {
+-                              dc2c->next_result = 0;
+-                              dc2c->color = dc2c->color ? 0 : 1;
+-                      }
++              if (result->color == color) {
+                       if (result->error) {
+                               err = result->error;
+                               if (err != ERR_ECMDUNKNOWN ||
diff --git a/queue-4.4/flow_dissector-fix-unaligned-access-in-__skb_flow_dissector-when-used-by-eth_get_headlen.patch b/queue-4.4/flow_dissector-fix-unaligned-access-in-__skb_flow_dissector-when-used-by-eth_get_headlen.patch
new file mode 100644 (file)
index 0000000..efd42bf
--- /dev/null
@@ -0,0 +1,52 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Alexander Duyck <aduyck@mirantis.com>
+Date: Tue, 9 Feb 2016 02:49:54 -0800
+Subject: flow_dissector: Fix unaligned access in __skb_flow_dissector when used by eth_get_headlen
+
+From: Alexander Duyck <aduyck@mirantis.com>
+
+[ Upstream commit 461547f3158978c180d74484d58e82be9b8e7357 ]
+
+This patch fixes an issue with unaligned accesses when using
+eth_get_headlen on a page that was DMA aligned instead of being IP aligned.
+The fact is when trying to check the length we don't need to be looking at
+the flow label so we can reorder the checks to first check if we are
+supposed to gather the flow label and then make the call to actually get
+it.
+
+v2:  Updated path so that either STOP_AT_FLOW_LABEL or KEY_FLOW_LABEL can
+     cause us to check for the flow label.
+
+Reported-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
+Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/flow_dissector.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -208,7 +208,6 @@ ip:
+       case htons(ETH_P_IPV6): {
+               const struct ipv6hdr *iph;
+               struct ipv6hdr _iph;
+-              __be32 flow_label;
+ ipv6:
+               iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
+@@ -230,8 +229,12 @@ ipv6:
+                       key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+               }
+-              flow_label = ip6_flowlabel(iph);
+-              if (flow_label) {
++              if ((dissector_uses_key(flow_dissector,
++                                      FLOW_DISSECTOR_KEY_FLOW_LABEL) ||
++                   (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) &&
++                  ip6_flowlabel(iph)) {
++                      __be32 flow_label = ip6_flowlabel(iph);
++
+                       if (dissector_uses_key(flow_dissector,
+                                              FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
+                               key_tags = skb_flow_dissector_target(flow_dissector,
diff --git a/queue-4.4/gro-make-gro-aware-of-lightweight-tunnels.patch b/queue-4.4/gro-make-gro-aware-of-lightweight-tunnels.patch
new file mode 100644 (file)
index 0000000..8d50540
--- /dev/null
@@ -0,0 +1,82 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Jesse Gross <jesse@kernel.org>
+Date: Wed, 20 Jan 2016 17:59:49 -0800
+Subject: gro: Make GRO aware of lightweight tunnels.
+
+From: Jesse Gross <jesse@kernel.org>
+
+[ Upstream commit ce87fc6ce3f9f4488546187e3757cf666d9d4a2a ]
+
+GRO is currently not aware of tunnel metadata generated by lightweight
+tunnels and stored in the dst. This leads to two possible problems:
+ * Incorrectly merging two frames that have different metadata.
+ * Leaking of allocated metadata from merged frames.
+
+This avoids those problems by comparing the tunnel information before
+merging, similar to how we handle other metadata (such as vlan tags),
+and releasing any state when we are done.
+
+Reported-by: John <john.phillips5@hpe.com>
+Fixes: 2e15ea39 ("ip_gre: Add support to collect tunnel metadata.")
+Signed-off-by: Jesse Gross <jesse@kernel.org>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Thomas Graf <tgraf@suug.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst_metadata.h |   18 ++++++++++++++++++
+ net/core/dev.c             |    7 +++++--
+ 2 files changed, 23 insertions(+), 2 deletions(-)
+
+--- a/include/net/dst_metadata.h
++++ b/include/net/dst_metadata.h
+@@ -44,6 +44,24 @@ static inline bool skb_valid_dst(const s
+       return dst && !(dst->flags & DST_METADATA);
+ }
++static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a,
++                                     const struct sk_buff *skb_b)
++{
++      const struct metadata_dst *a, *b;
++
++      if (!(skb_a->_skb_refdst | skb_b->_skb_refdst))
++              return 0;
++
++      a = (const struct metadata_dst *) skb_dst(skb_a);
++      b = (const struct metadata_dst *) skb_dst(skb_b);
++
++      if (!a != !b || a->u.tun_info.options_len != b->u.tun_info.options_len)
++              return 1;
++
++      return memcmp(&a->u.tun_info, &b->u.tun_info,
++                    sizeof(a->u.tun_info) + a->u.tun_info.options_len);
++}
++
+ struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
+ struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4145,6 +4145,7 @@ static void gro_list_prepare(struct napi
+               diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
+               diffs |= p->vlan_tci ^ skb->vlan_tci;
++              diffs |= skb_metadata_dst_cmp(p, skb);
+               if (maclen == ETH_HLEN)
+                       diffs |= compare_ether_header(skb_mac_header(p),
+                                                     skb_mac_header(skb));
+@@ -4342,10 +4343,12 @@ static gro_result_t napi_skb_finish(gro_
+               break;
+       case GRO_MERGED_FREE:
+-              if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
++              if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
++                      skb_dst_drop(skb);
+                       kmem_cache_free(skbuff_head_cache, skb);
+-              else
++              } else {
+                       __kfree_skb(skb);
++              }
+               break;
+       case GRO_HELD:
diff --git a/queue-4.4/iff_no_queue-fix-for-drivers-not-calling-ether_setup.patch b/queue-4.4/iff_no_queue-fix-for-drivers-not-calling-ether_setup.patch
new file mode 100644 (file)
index 0000000..950386c
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Phil Sutter <phil@nwl.cc>
+Date: Wed, 17 Feb 2016 15:37:43 +0100
+Subject: IFF_NO_QUEUE: Fix for drivers not calling ether_setup()
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit a813104d923339144078939175faf4e66aca19b4 ]
+
+My implementation around IFF_NO_QUEUE driver flag assumed that leaving
+tx_queue_len untouched (specifically: not setting it to zero) by drivers
+would make it possible to assign a regular qdisc to them without having
+to worry about setting tx_queue_len to a useful value. This was only
+partially true: I overlooked that some drivers don't call ether_setup()
+and therefore not initialize tx_queue_len to the default value of 1000.
+Consequently, removing the workarounds in place for that case in qdisc
+implementations which cared about it (namely, pfifo, bfifo, gred, htb,
+plug and sfb) leads to problems with these specific interface types and
+qdiscs.
+
+Luckily, there's already a sanitization point for drivers setting
+tx_queue_len to zero, which can be reused to assign the fallback value
+most qdisc implementations used, which is 1.
+
+Fixes: 348e3435cbefa ("net: sched: drop all special handling of tx_queue_len == 0")
+Tested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -7128,8 +7128,10 @@ struct net_device *alloc_netdev_mqs(int
+       dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
+       setup(dev);
+-      if (!dev->tx_queue_len)
++      if (!dev->tx_queue_len) {
+               dev->priv_flags |= IFF_NO_QUEUE;
++              dev->tx_queue_len = 1;
++      }
+       dev->num_tx_queues = txqs;
+       dev->real_num_tx_queues = txqs;
diff --git a/queue-4.4/inet-frag-always-orphan-skbs-inside-ip_defrag.patch b/queue-4.4/inet-frag-always-orphan-skbs-inside-ip_defrag.patch
new file mode 100644 (file)
index 0000000..087c594
--- /dev/null
@@ -0,0 +1,160 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Joe Stringer <joe@ovn.org>
+Date: Fri, 22 Jan 2016 15:49:12 -0800
+Subject: inet: frag: Always orphan skbs inside ip_defrag()
+
+From: Joe Stringer <joe@ovn.org>
+
+[ Upstream commit 8282f27449bf15548cb82c77b6e04ee0ab827bdc ]
+
+Later parts of the stack (including fragmentation) expect that there is
+never a socket attached to frag in a frag_list, however this invariant
+was not enforced on all defrag paths. This could lead to the
+BUG_ON(skb->sk) during ip_do_fragment(), as per the call stack at the
+end of this commit message.
+
+While the call could be added to openvswitch to fix this particular
+error, the head and tail of the frags list are already orphaned
+indirectly inside ip_defrag(), so it seems like the remaining fragments
+should all be orphaned in all circumstances.
+
+kernel BUG at net/ipv4/ip_output.c:586!
+[...]
+Call Trace:
+ <IRQ>
+ [<ffffffffa0205270>] ? do_output.isra.29+0x1b0/0x1b0 [openvswitch]
+ [<ffffffffa02167a7>] ovs_fragment+0xcc/0x214 [openvswitch]
+ [<ffffffff81667830>] ? dst_discard_out+0x20/0x20
+ [<ffffffff81667810>] ? dst_ifdown+0x80/0x80
+ [<ffffffffa0212072>] ? find_bucket.isra.2+0x62/0x70 [openvswitch]
+ [<ffffffff810e0ba5>] ? mod_timer_pending+0x65/0x210
+ [<ffffffff810b732b>] ? __lock_acquire+0x3db/0x1b90
+ [<ffffffffa03205a2>] ? nf_conntrack_in+0x252/0x500 [nf_conntrack]
+ [<ffffffff810b63c4>] ? __lock_is_held+0x54/0x70
+ [<ffffffffa02051a3>] do_output.isra.29+0xe3/0x1b0 [openvswitch]
+ [<ffffffffa0206411>] do_execute_actions+0xe11/0x11f0 [openvswitch]
+ [<ffffffff810b63c4>] ? __lock_is_held+0x54/0x70
+ [<ffffffffa0206822>] ovs_execute_actions+0x32/0xd0 [openvswitch]
+ [<ffffffffa020b505>] ovs_dp_process_packet+0x85/0x140 [openvswitch]
+ [<ffffffff810b63c4>] ? __lock_is_held+0x54/0x70
+ [<ffffffffa02068a2>] ovs_execute_actions+0xb2/0xd0 [openvswitch]
+ [<ffffffffa020b505>] ovs_dp_process_packet+0x85/0x140 [openvswitch]
+ [<ffffffffa0215019>] ? ovs_ct_get_labels+0x49/0x80 [openvswitch]
+ [<ffffffffa0213a1d>] ovs_vport_receive+0x5d/0xa0 [openvswitch]
+ [<ffffffff810b732b>] ? __lock_acquire+0x3db/0x1b90
+ [<ffffffff810b732b>] ? __lock_acquire+0x3db/0x1b90
+ [<ffffffff810b732b>] ? __lock_acquire+0x3db/0x1b90
+ [<ffffffffa0214895>] ? internal_dev_xmit+0x5/0x140 [openvswitch]
+ [<ffffffffa02148fc>] internal_dev_xmit+0x6c/0x140 [openvswitch]
+ [<ffffffffa0214895>] ? internal_dev_xmit+0x5/0x140 [openvswitch]
+ [<ffffffff81660299>] dev_hard_start_xmit+0x2b9/0x5e0
+ [<ffffffff8165fc21>] ? netif_skb_features+0xd1/0x1f0
+ [<ffffffff81660f20>] __dev_queue_xmit+0x800/0x930
+ [<ffffffff81660770>] ? __dev_queue_xmit+0x50/0x930
+ [<ffffffff810b53f1>] ? mark_held_locks+0x71/0x90
+ [<ffffffff81669876>] ? neigh_resolve_output+0x106/0x220
+ [<ffffffff81661060>] dev_queue_xmit+0x10/0x20
+ [<ffffffff816698e8>] neigh_resolve_output+0x178/0x220
+ [<ffffffff816a8e6f>] ? ip_finish_output2+0x1ff/0x590
+ [<ffffffff816a8e6f>] ip_finish_output2+0x1ff/0x590
+ [<ffffffff816a8cee>] ? ip_finish_output2+0x7e/0x590
+ [<ffffffff816a9a31>] ip_do_fragment+0x831/0x8a0
+ [<ffffffff816a8c70>] ? ip_copy_metadata+0x1b0/0x1b0
+ [<ffffffff816a9ae3>] ip_fragment.constprop.49+0x43/0x80
+ [<ffffffff816a9c9c>] ip_finish_output+0x17c/0x340
+ [<ffffffff8169a6f4>] ? nf_hook_slow+0xe4/0x190
+ [<ffffffff816ab4c0>] ip_output+0x70/0x110
+ [<ffffffff816a9b20>] ? ip_fragment.constprop.49+0x80/0x80
+ [<ffffffff816aa9f9>] ip_local_out+0x39/0x70
+ [<ffffffff816abf89>] ip_send_skb+0x19/0x40
+ [<ffffffff816abfe3>] ip_push_pending_frames+0x33/0x40
+ [<ffffffff816df21a>] icmp_push_reply+0xea/0x120
+ [<ffffffff816df93d>] icmp_reply.constprop.23+0x1ed/0x230
+ [<ffffffff816df9ce>] icmp_echo.part.21+0x4e/0x50
+ [<ffffffff810b63c4>] ? __lock_is_held+0x54/0x70
+ [<ffffffff810d5f9e>] ? rcu_read_lock_held+0x5e/0x70
+ [<ffffffff816dfa06>] icmp_echo+0x36/0x70
+ [<ffffffff816e0d11>] icmp_rcv+0x271/0x450
+ [<ffffffff816a4ca7>] ip_local_deliver_finish+0x127/0x3a0
+ [<ffffffff816a4bc1>] ? ip_local_deliver_finish+0x41/0x3a0
+ [<ffffffff816a5160>] ip_local_deliver+0x60/0xd0
+ [<ffffffff816a4b80>] ? ip_rcv_finish+0x560/0x560
+ [<ffffffff816a46fd>] ip_rcv_finish+0xdd/0x560
+ [<ffffffff816a5453>] ip_rcv+0x283/0x3e0
+ [<ffffffff810b6302>] ? match_held_lock+0x192/0x200
+ [<ffffffff816a4620>] ? inet_del_offload+0x40/0x40
+ [<ffffffff8165d062>] __netif_receive_skb_core+0x392/0xae0
+ [<ffffffff8165e68e>] ? process_backlog+0x8e/0x230
+ [<ffffffff810b53f1>] ? mark_held_locks+0x71/0x90
+ [<ffffffff8165d7c8>] __netif_receive_skb+0x18/0x60
+ [<ffffffff8165e678>] process_backlog+0x78/0x230
+ [<ffffffff8165e6dd>] ? process_backlog+0xdd/0x230
+ [<ffffffff8165e355>] net_rx_action+0x155/0x400
+ [<ffffffff8106b48c>] __do_softirq+0xcc/0x420
+ [<ffffffff816a8e87>] ? ip_finish_output2+0x217/0x590
+ [<ffffffff8178e78c>] do_softirq_own_stack+0x1c/0x30
+ <EOI>
+ [<ffffffff8106b88e>] do_softirq+0x4e/0x60
+ [<ffffffff8106b948>] __local_bh_enable_ip+0xa8/0xb0
+ [<ffffffff816a8eb0>] ip_finish_output2+0x240/0x590
+ [<ffffffff816a9a31>] ? ip_do_fragment+0x831/0x8a0
+ [<ffffffff816a9a31>] ip_do_fragment+0x831/0x8a0
+ [<ffffffff816a8c70>] ? ip_copy_metadata+0x1b0/0x1b0
+ [<ffffffff816a9ae3>] ip_fragment.constprop.49+0x43/0x80
+ [<ffffffff816a9c9c>] ip_finish_output+0x17c/0x340
+ [<ffffffff8169a6f4>] ? nf_hook_slow+0xe4/0x190
+ [<ffffffff816ab4c0>] ip_output+0x70/0x110
+ [<ffffffff816a9b20>] ? ip_fragment.constprop.49+0x80/0x80
+ [<ffffffff816aa9f9>] ip_local_out+0x39/0x70
+ [<ffffffff816abf89>] ip_send_skb+0x19/0x40
+ [<ffffffff816abfe3>] ip_push_pending_frames+0x33/0x40
+ [<ffffffff816d55d3>] raw_sendmsg+0x7d3/0xc30
+ [<ffffffff810b732b>] ? __lock_acquire+0x3db/0x1b90
+ [<ffffffff816e7557>] ? inet_sendmsg+0xc7/0x1d0
+ [<ffffffff810b63c4>] ? __lock_is_held+0x54/0x70
+ [<ffffffff816e759a>] inet_sendmsg+0x10a/0x1d0
+ [<ffffffff816e7495>] ? inet_sendmsg+0x5/0x1d0
+ [<ffffffff8163e398>] sock_sendmsg+0x38/0x50
+ [<ffffffff8163ec5f>] ___sys_sendmsg+0x25f/0x270
+ [<ffffffff811aadad>] ? handle_mm_fault+0x8dd/0x1320
+ [<ffffffff8178c147>] ? _raw_spin_unlock+0x27/0x40
+ [<ffffffff810529b2>] ? __do_page_fault+0x1e2/0x460
+ [<ffffffff81204886>] ? __fget_light+0x66/0x90
+ [<ffffffff8163f8e2>] __sys_sendmsg+0x42/0x80
+ [<ffffffff8163f932>] SyS_sendmsg+0x12/0x20
+ [<ffffffff8178cb17>] entry_SYSCALL_64_fastpath+0x12/0x6f
+Code: 00 00 44 89 e0 e9 7c fb ff ff 4c 89 ff e8 e7 e7 ff ff 41 8b 9d 80 00 00 00 2b 5d d4 89 d8 c1 f8 03 0f b7 c0 e9 33 ff ff f
+ 66 66 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 55 48
+RIP  [<ffffffff816a9a92>] ip_do_fragment+0x892/0x8a0
+ RSP <ffff88006d603170>
+
+Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
+Signed-off-by: Joe Stringer <joe@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_fragment.c              |    1 +
+ net/ipv4/netfilter/nf_defrag_ipv4.c |    2 --
+ 2 files changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -661,6 +661,7 @@ int ip_defrag(struct net *net, struct sk
+       struct ipq *qp;
+       IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
++      skb_orphan(skb);
+       /* Lookup (or create) queue header */
+       qp = ip_find(net, ip_hdr(skb), user, vif);
+--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
++++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
+@@ -27,8 +27,6 @@ static int nf_ct_ipv4_gather_frags(struc
+ {
+       int err;
+-      skb_orphan(skb);
+-
+       local_bh_disable();
+       err = ip_defrag(net, skb, user);
+       local_bh_enable();
diff --git a/queue-4.4/ipv4-fix-memory-leaks-in-ip_cmsg_send-callers.patch b/queue-4.4/ipv4-fix-memory-leaks-in-ip_cmsg_send-callers.patch
new file mode 100644 (file)
index 0000000..19bc874
--- /dev/null
@@ -0,0 +1,80 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 4 Feb 2016 06:23:28 -0800
+Subject: ipv4: fix memory leaks in ip_cmsg_send() callers
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 919483096bfe75dda338e98d56da91a263746a0a ]
+
+Dmitry reported memory leaks of IP options allocated in
+ip_cmsg_send() when/if this function returns an error.
+
+Callers are responsible for the freeing.
+
+Many thanks to Dmitry for the report and diagnostic.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_sockglue.c |    2 ++
+ net/ipv4/ping.c        |    4 +++-
+ net/ipv4/raw.c         |    4 +++-
+ net/ipv4/udp.c         |    4 +++-
+ 4 files changed, 11 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/ip_sockglue.c
++++ b/net/ipv4/ip_sockglue.c
+@@ -249,6 +249,8 @@ int ip_cmsg_send(struct net *net, struct
+               switch (cmsg->cmsg_type) {
+               case IP_RETOPTS:
+                       err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
++
++                      /* Our caller is responsible for freeing ipc->opt */
+                       err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
+                                            err < 40 ? err : 40);
+                       if (err)
+--- a/net/ipv4/ping.c
++++ b/net/ipv4/ping.c
+@@ -746,8 +746,10 @@ static int ping_v4_sendmsg(struct sock *
+       if (msg->msg_controllen) {
+               err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
+-              if (err)
++              if (unlikely(err)) {
++                      kfree(ipc.opt);
+                       return err;
++              }
+               if (ipc.opt)
+                       free = 1;
+       }
+--- a/net/ipv4/raw.c
++++ b/net/ipv4/raw.c
+@@ -547,8 +547,10 @@ static int raw_sendmsg(struct sock *sk,
+       if (msg->msg_controllen) {
+               err = ip_cmsg_send(net, msg, &ipc, false);
+-              if (err)
++              if (unlikely(err)) {
++                      kfree(ipc.opt);
+                       goto out;
++              }
+               if (ipc.opt)
+                       free = 1;
+       }
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -966,8 +966,10 @@ int udp_sendmsg(struct sock *sk, struct
+       if (msg->msg_controllen) {
+               err = ip_cmsg_send(sock_net(sk), msg, &ipc,
+                                  sk->sk_family == AF_INET6);
+-              if (err)
++              if (unlikely(err)) {
++                      kfree(ipc.opt);
+                       return err;
++              }
+               if (ipc.opt)
+                       free = 1;
+               connected = 0;
diff --git a/queue-4.4/ipv6-addrconf-fix-recursive-spin-lock-call.patch b/queue-4.4/ipv6-addrconf-fix-recursive-spin-lock-call.patch
new file mode 100644 (file)
index 0000000..60be451
--- /dev/null
@@ -0,0 +1,79 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: "subashab@codeaurora.org" <subashab@codeaurora.org>
+Date: Tue, 2 Feb 2016 02:11:10 +0000
+Subject: ipv6: addrconf: Fix recursive spin lock call
+
+From: "subashab@codeaurora.org" <subashab@codeaurora.org>
+
+[ Upstream commit 16186a82de1fdd868255448274e64ae2616e2640 ]
+
+A rcu stall with the following backtrace was seen on a system with
+forwarding, optimistic_dad and use_optimistic set. To reproduce,
+set these flags and allow ipv6 autoconf.
+
+This occurs because the device write_lock is acquired while already
+holding the read_lock. Back trace below -
+
+INFO: rcu_preempt self-detected stall on CPU { 1}  (t=2100 jiffies
+ g=3992 c=3991 q=4471)
+<6> Task dump for CPU 1:
+<2> kworker/1:0     R  running task    12168    15   2 0x00000002
+<2> Workqueue: ipv6_addrconf addrconf_dad_work
+<6> Call trace:
+<2> [<ffffffc000084da8>] el1_irq+0x68/0xdc
+<2> [<ffffffc000cc4e0c>] _raw_write_lock_bh+0x20/0x30
+<2> [<ffffffc000bc5dd8>] __ipv6_dev_ac_inc+0x64/0x1b4
+<2> [<ffffffc000bcbd2c>] addrconf_join_anycast+0x9c/0xc4
+<2> [<ffffffc000bcf9f0>] __ipv6_ifa_notify+0x160/0x29c
+<2> [<ffffffc000bcfb7c>] ipv6_ifa_notify+0x50/0x70
+<2> [<ffffffc000bd035c>] addrconf_dad_work+0x314/0x334
+<2> [<ffffffc0000b64c8>] process_one_work+0x244/0x3fc
+<2> [<ffffffc0000b7324>] worker_thread+0x2f8/0x418
+<2> [<ffffffc0000bb40c>] kthread+0xe0/0xec
+
+v2: do addrconf_dad_kick inside read lock and then acquire write
+lock for ipv6_ifa_notify as suggested by Eric
+
+Fixes: 7fd2561e4ebdd ("net: ipv6: Add a sysctl to make optimistic
+addresses useful candidates")
+
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Erik Kline <ek@google.com>
+Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -3506,6 +3506,7 @@ static void addrconf_dad_begin(struct in
+ {
+       struct inet6_dev *idev = ifp->idev;
+       struct net_device *dev = idev->dev;
++      bool notify = false;
+       addrconf_join_solict(dev, &ifp->addr);
+@@ -3551,7 +3552,7 @@ static void addrconf_dad_begin(struct in
+                       /* Because optimistic nodes can use this address,
+                        * notify listeners. If DAD fails, RTM_DELADDR is sent.
+                        */
+-                      ipv6_ifa_notify(RTM_NEWADDR, ifp);
++                      notify = true;
+               }
+       }
+@@ -3559,6 +3560,8 @@ static void addrconf_dad_begin(struct in
+ out:
+       spin_unlock(&ifp->lock);
+       read_unlock_bh(&idev->lock);
++      if (notify)
++              ipv6_ifa_notify(RTM_NEWADDR, ifp);
+ }
+ static void addrconf_dad_start(struct inet6_ifaddr *ifp)
diff --git a/queue-4.4/ipv6-enforce-flowi6_oif-usage-in-ip6_dst_lookup_tail.patch b/queue-4.4/ipv6-enforce-flowi6_oif-usage-in-ip6_dst_lookup_tail.patch
new file mode 100644 (file)
index 0000000..317912a
--- /dev/null
@@ -0,0 +1,109 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 29 Jan 2016 12:30:19 +0100
+Subject: ipv6: enforce flowi6_oif usage in ip6_dst_lookup_tail()
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 6f21c96a78b835259546d8f3fb4edff0f651d478 ]
+
+The current implementation of ip6_dst_lookup_tail basically
+ignore the egress ifindex match: if the saddr is set,
+ip6_route_output() purposefully ignores flowi6_oif, due
+to the commit d46a9d678e4c ("net: ipv6: Dont add RT6_LOOKUP_F_IFACE
+flag if saddr set"), if the saddr is 'any' the first route lookup
+in ip6_dst_lookup_tail fails, but upon failure a second lookup will
+be performed with saddr set, thus ignoring the ifindex constraint.
+
+This commit adds an output route lookup function variant, which
+allows the caller to specify lookup flags, and modify
+ip6_dst_lookup_tail() to enforce the ifindex match on the second
+lookup via said helper.
+
+ip6_route_output() becames now a static inline function build on
+top of ip6_route_output_flags(); as a side effect, out-of-tree
+modules need now a GPL license to access the output route lookup
+functionality.
+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Acked-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ip6_route.h |   12 ++++++++++--
+ net/ipv6/ip6_output.c   |    6 +++++-
+ net/ipv6/route.c        |    7 +++----
+ 3 files changed, 18 insertions(+), 7 deletions(-)
+
+--- a/include/net/ip6_route.h
++++ b/include/net/ip6_route.h
+@@ -64,8 +64,16 @@ static inline bool rt6_need_strict(const
+ void ip6_route_input(struct sk_buff *skb);
+-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
+-                                 struct flowi6 *fl6);
++struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
++                                       struct flowi6 *fl6, int flags);
++
++static inline struct dst_entry *ip6_route_output(struct net *net,
++                                               const struct sock *sk,
++                                               struct flowi6 *fl6)
++{
++      return ip6_route_output_flags(net, sk, fl6, 0);
++}
++
+ struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
+                                  int flags);
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -909,6 +909,7 @@ static int ip6_dst_lookup_tail(struct ne
+       struct rt6_info *rt;
+ #endif
+       int err;
++      int flags = 0;
+       /* The correct way to handle this would be to do
+        * ip6_route_get_saddr, and then ip6_route_output; however,
+@@ -940,10 +941,13 @@ static int ip6_dst_lookup_tail(struct ne
+                       dst_release(*dst);
+                       *dst = NULL;
+               }
++
++              if (fl6->flowi6_oif)
++                      flags |= RT6_LOOKUP_F_IFACE;
+       }
+       if (!*dst)
+-              *dst = ip6_route_output(net, sk, fl6);
++              *dst = ip6_route_output_flags(net, sk, fl6, flags);
+       err = (*dst)->error;
+       if (err)
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1174,11 +1174,10 @@ static struct rt6_info *ip6_pol_route_ou
+       return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
+ }
+-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
+-                                  struct flowi6 *fl6)
++struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
++                                       struct flowi6 *fl6, int flags)
+ {
+       struct dst_entry *dst;
+-      int flags = 0;
+       bool any_src;
+       dst = l3mdev_rt6_dst_by_oif(net, fl6);
+@@ -1199,7 +1198,7 @@ struct dst_entry *ip6_route_output(struc
+       return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
+ }
+-EXPORT_SYMBOL(ip6_route_output);
++EXPORT_SYMBOL_GPL(ip6_route_output_flags);
+ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
+ {
diff --git a/queue-4.4/ipv6-fix-a-lockdep-splat.patch b/queue-4.4/ipv6-fix-a-lockdep-splat.patch
new file mode 100644 (file)
index 0000000..f85e130
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 2 Feb 2016 17:55:01 -0800
+Subject: ipv6: fix a lockdep splat
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 44c3d0c1c0a880354e9de5d94175742e2c7c9683 ]
+
+Silence lockdep false positive about rcu_dereference() being
+used in the wrong context.
+
+First one should use rcu_dereference_protected() as we own the spinlock.
+
+Second one should be a normal assignation, as no barrier is needed.
+
+Fixes: 18367681a10bd ("ipv6 flowlabel: Convert np->ipv6_fl_list to RCU.")
+Reported-by: Dave Jones <davej@codemonkey.org.uk>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_flowlabel.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/ip6_flowlabel.c
++++ b/net/ipv6/ip6_flowlabel.c
+@@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk,
+               }
+               spin_lock_bh(&ip6_sk_fl_lock);
+               for (sflp = &np->ipv6_fl_list;
+-                   (sfl = rcu_dereference(*sflp)) != NULL;
++                   (sfl = rcu_dereference_protected(*sflp,
++                                                    lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
+                    sflp = &sfl->next) {
+                       if (sfl->fl->label == freq.flr_label) {
+                               if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
+                                       np->flow_label &= ~IPV6_FLOWLABEL_MASK;
+-                              *sflp = rcu_dereference(sfl->next);
++                              *sflp = sfl->next;
+                               spin_unlock_bh(&ip6_sk_fl_lock);
+                               fl_release(sfl->fl);
+                               kfree_rcu(sfl, rcu);
diff --git a/queue-4.4/ipv6-udp-use-sticky-pktinfo-egress-ifindex-on-connect.patch b/queue-4.4/ipv6-udp-use-sticky-pktinfo-egress-ifindex-on-connect.patch
new file mode 100644 (file)
index 0000000..a99eeb3
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 29 Jan 2016 12:30:20 +0100
+Subject: ipv6/udp: use sticky pktinfo egress ifindex on connect()
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 1cdda91871470f15e79375991bd2eddc6e86ddb1 ]
+
+Currently, the egress interface index specified via IPV6_PKTINFO
+is ignored by __ip6_datagram_connect(), so that RFC 3542 section 6.7
+can be subverted when the user space application calls connect()
+before sendmsg().
+Fix it by initializing properly flowi6_oif in connect() before
+performing the route lookup.
+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/datagram.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ipv6/datagram.c
++++ b/net/ipv6/datagram.c
+@@ -162,6 +162,9 @@ ipv4_connected:
+       fl6.fl6_dport = inet->inet_dport;
+       fl6.fl6_sport = inet->inet_sport;
++      if (!fl6.flowi6_oif)
++              fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
++
+       if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
+               fl6.flowi6_oif = np->mcast_oif;
diff --git a/queue-4.4/l2tp-fix-error-creating-l2tp-tunnels.patch b/queue-4.4/l2tp-fix-error-creating-l2tp-tunnels.patch
new file mode 100644 (file)
index 0000000..40f859a
--- /dev/null
@@ -0,0 +1,59 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Mark Tomlinson <mark.tomlinson@alliedtelesis.co.nz>
+Date: Mon, 15 Feb 2016 16:24:44 +1300
+Subject: l2tp: Fix error creating L2TP tunnels
+
+From: Mark Tomlinson <mark.tomlinson@alliedtelesis.co.nz>
+
+[ Upstream commit 853effc55b0f975abd6d318cca486a9c1b67e10f ]
+
+A previous commit (33f72e6) added notification via netlink for tunnels
+when created/modified/deleted. If the notification returned an error,
+this error was returned from the tunnel function. If there were no
+listeners, the error code ESRCH was returned, even though having no
+listeners is not an error. Other calls to this and other similar
+notification functions either ignore the error code, or filter ESRCH.
+This patch checks for ESRCH and does not flag this as an error.
+
+Reviewed-by: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
+Signed-off-by: Mark Tomlinson <mark.tomlinson@alliedtelesis.co.nz>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_netlink.c |   18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+--- a/net/l2tp/l2tp_netlink.c
++++ b/net/l2tp/l2tp_netlink.c
+@@ -124,8 +124,13 @@ static int l2tp_tunnel_notify(struct gen
+       ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
+                                 NLM_F_ACK, tunnel, cmd);
+-      if (ret >= 0)
+-              return genlmsg_multicast_allns(family, msg, 0,  0, GFP_ATOMIC);
++      if (ret >= 0) {
++              ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
++              /* We don't care if no one is listening */
++              if (ret == -ESRCH)
++                      ret = 0;
++              return ret;
++      }
+       nlmsg_free(msg);
+@@ -147,8 +152,13 @@ static int l2tp_session_notify(struct ge
+       ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
+                                  NLM_F_ACK, session, cmd);
+-      if (ret >= 0)
+-              return genlmsg_multicast_allns(family, msg, 0,  0, GFP_ATOMIC);
++      if (ret >= 0) {
++              ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
++              /* We don't care if no one is listening */
++              if (ret == -ESRCH)
++                      ret = 0;
++              return ret;
++      }
+       nlmsg_free(msg);
diff --git a/queue-4.4/lwt-fix-rx-checksum-setting-for-lwt-devices-tunneling-over-ipv6.patch b/queue-4.4/lwt-fix-rx-checksum-setting-for-lwt-devices-tunneling-over-ipv6.patch
new file mode 100644 (file)
index 0000000..701bd7d
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Wed, 17 Feb 2016 19:30:01 +0100
+Subject: lwt: fix rx checksum setting for lwt devices tunneling over ipv6
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit c868ee7063bdb53f3ef9eac7bcec84960980b471 ]
+
+the commit 35e2d1152b22 ("tunnels: Allow IPv6 UDP checksums to be
+correctly controlled.") changed the default xmit checksum setting
+for lwt vxlan/geneve ipv6 tunnels, so that now the checksum is not
+set into external UDP header.
+This commit changes the rx checksum setting for both lwt vxlan/geneve
+devices created by openvswitch accordingly, so that lwt over ipv6
+tunnel pairs are again able to communicate with default values.
+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Jiri Benc <jbenc@redhat.com>
+Acked-by: Jesse Gross <jesse@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/vport-vxlan.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/openvswitch/vport-vxlan.c
++++ b/net/openvswitch/vport-vxlan.c
+@@ -90,7 +90,7 @@ static struct vport *vxlan_tnl_create(co
+       int err;
+       struct vxlan_config conf = {
+               .no_share = true,
+-              .flags = VXLAN_F_COLLECT_METADATA,
++              .flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX,
+       };
+       if (!options) {
diff --git a/queue-4.4/net-add-sysctl_max_skb_frags.patch b/queue-4.4/net-add-sysctl_max_skb_frags.patch
new file mode 100644 (file)
index 0000000..c2e30c9
--- /dev/null
@@ -0,0 +1,99 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
+Date: Wed, 3 Feb 2016 09:26:57 +0100
+Subject: net:Add sysctl_max_skb_frags
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
+
+[ Upstream commit 5f74f82ea34c0da80ea0b49192bb5ea06e063593 ]
+
+Devices may have limits on the number of fragments in an skb they support.
+Current codebase uses a constant as maximum for number of fragments one
+skb can hold and use.
+When enabling scatter/gather and running traffic with many small messages
+the codebase uses the maximum number of fragments and may thereby violate
+the max for certain devices.
+The patch introduces a global variable as max number of fragments.
+
+Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
+Reviewed-by: HÃ¥kon Bugge <haakon.bugge@oracle.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h     |    1 +
+ net/core/skbuff.c          |    2 ++
+ net/core/sysctl_net_core.c |   10 ++++++++++
+ net/ipv4/tcp.c             |    4 ++--
+ 4 files changed, 15 insertions(+), 2 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -219,6 +219,7 @@ struct sk_buff;
+ #else
+ #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1)
+ #endif
++extern int sysctl_max_skb_frags;
+ typedef struct skb_frag_struct skb_frag_t;
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -79,6 +79,8 @@
+ struct kmem_cache *skbuff_head_cache __read_mostly;
+ static struct kmem_cache *skbuff_fclone_cache __read_mostly;
++int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
++EXPORT_SYMBOL(sysctl_max_skb_frags);
+ /**
+  *    skb_panic - private function for out-of-line support
+--- a/net/core/sysctl_net_core.c
++++ b/net/core/sysctl_net_core.c
+@@ -26,6 +26,7 @@ static int zero = 0;
+ static int one = 1;
+ static int min_sndbuf = SOCK_MIN_SNDBUF;
+ static int min_rcvbuf = SOCK_MIN_RCVBUF;
++static int max_skb_frags = MAX_SKB_FRAGS;
+ static int net_msg_warn;      /* Unused, but still a sysctl */
+@@ -392,6 +393,15 @@ static struct ctl_table net_core_table[]
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec
+       },
++      {
++              .procname       = "max_skb_frags",
++              .data           = &sysctl_max_skb_frags,
++              .maxlen         = sizeof(int),
++              .mode           = 0644,
++              .proc_handler   = proc_dointvec_minmax,
++              .extra1         = &one,
++              .extra2         = &max_skb_frags,
++      },
+       { }
+ };
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -939,7 +939,7 @@ new_segment:
+               i = skb_shinfo(skb)->nr_frags;
+               can_coalesce = skb_can_coalesce(skb, i, page, offset);
+-              if (!can_coalesce && i >= MAX_SKB_FRAGS) {
++              if (!can_coalesce && i >= sysctl_max_skb_frags) {
+                       tcp_mark_push(tp, skb);
+                       goto new_segment;
+               }
+@@ -1212,7 +1212,7 @@ new_segment:
+                       if (!skb_can_coalesce(skb, i, pfrag->page,
+                                             pfrag->offset)) {
+-                              if (i == MAX_SKB_FRAGS || !sg) {
++                              if (i == sysctl_max_skb_frags || !sg) {
+                                       tcp_mark_push(tp, skb);
+                                       goto new_segment;
+                               }
diff --git a/queue-4.4/net-copy-inner-l3-and-l4-headers-as-unaligned-on-gre-teb.patch b/queue-4.4/net-copy-inner-l3-and-l4-headers-as-unaligned-on-gre-teb.patch
new file mode 100644 (file)
index 0000000..eeb02ce
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Alexander Duyck <aduyck@mirantis.com>
+Date: Tue, 9 Feb 2016 06:14:43 -0800
+Subject: net: Copy inner L3 and L4 headers as unaligned on GRE TEB
+
+From: Alexander Duyck <aduyck@mirantis.com>
+
+[ Upstream commit 78565208d73ca9b654fb9a6b142214d52eeedfd1 ]
+
+This patch corrects the unaligned accesses seen on GRE TEB tunnels when
+generating hash keys.  Specifically what this patch does is make it so that
+we force the use of skb_copy_bits when the GRE inner headers will be
+unaligned due to NET_IP_ALIGNED being a non-zero value.
+
+Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
+Acked-by: Tom Herbert <tom@herbertland.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/flow_dissector.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -396,6 +396,13 @@ ip_proto_again:
+                               goto out_bad;
+                       proto = eth->h_proto;
+                       nhoff += sizeof(*eth);
++
++                      /* Cap headers that we access via pointers at the
++                       * end of the Ethernet header as our maximum alignment
++                       * at that point is only 2 bytes.
++                       */
++                      if (NET_IP_ALIGN)
++                              hlen = nhoff;
+               }
+               key_control->flags |= FLOW_DIS_ENCAPSULATION;
diff --git a/queue-4.4/net-dp83640-fix-tx-timestamp-overflow-handling.patch b/queue-4.4/net-dp83640-fix-tx-timestamp-overflow-handling.patch
new file mode 100644 (file)
index 0000000..326615e
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Manfred Rudigier <Manfred.Rudigier@omicron.at>
+Date: Wed, 20 Jan 2016 11:22:28 +0100
+Subject: net: dp83640: Fix tx timestamp overflow handling.
+
+From: Manfred Rudigier <Manfred.Rudigier@omicron.at>
+
+[ Upstream commit 81e8f2e930fe76b9814c71b9d87c30760b5eb705 ]
+
+PHY status frames are not reliable, the PHY may not be able to send them
+during heavy receive traffic. This overflow condition is signaled by the
+PHY in the next status frame, but the driver did not make use of it.
+Instead it always reported wrong tx timestamps to user space after an
+overflow happened because it assigned newly received tx timestamps to old
+packets in the queue.
+
+This commit fixes this issue by clearing the tx timestamp queue every time
+an overflow happens, so that no timestamps are delivered for overflow
+packets. This way time stamping will continue correctly after an overflow.
+
+Signed-off-by: Manfred Rudigier <manfred.rudigier@omicron.at>
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/dp83640.c |   17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+--- a/drivers/net/phy/dp83640.c
++++ b/drivers/net/phy/dp83640.c
+@@ -845,6 +845,11 @@ static void decode_rxts(struct dp83640_p
+       struct skb_shared_hwtstamps *shhwtstamps = NULL;
+       struct sk_buff *skb;
+       unsigned long flags;
++      u8 overflow;
++
++      overflow = (phy_rxts->ns_hi >> 14) & 0x3;
++      if (overflow)
++              pr_debug("rx timestamp queue overflow, count %d\n", overflow);
+       spin_lock_irqsave(&dp83640->rx_lock, flags);
+@@ -887,6 +892,7 @@ static void decode_txts(struct dp83640_p
+       struct skb_shared_hwtstamps shhwtstamps;
+       struct sk_buff *skb;
+       u64 ns;
++      u8 overflow;
+       /* We must already have the skb that triggered this. */
+@@ -896,6 +902,17 @@ static void decode_txts(struct dp83640_p
+               pr_debug("have timestamp but tx_queue empty\n");
+               return;
+       }
++
++      overflow = (phy_txts->ns_hi >> 14) & 0x3;
++      if (overflow) {
++              pr_debug("tx timestamp queue overflow, count %d\n", overflow);
++              while (skb) {
++                      skb_complete_tx_timestamp(skb, NULL);
++                      skb = skb_dequeue(&dp83640->tx_queue);
++              }
++              return;
++      }
++
+       ns = phy2txts(phy_txts);
+       memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+       shhwtstamps.hwtstamp = ns_to_ktime(ns);
diff --git a/queue-4.4/net-dsa-fix-mv88e6xxx-switches.patch b/queue-4.4/net-dsa-fix-mv88e6xxx-switches.patch
new file mode 100644 (file)
index 0000000..3b849c4
--- /dev/null
@@ -0,0 +1,71 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Russell King <rmk+kernel@arm.linux.org.uk>
+Date: Sun, 24 Jan 2016 09:22:05 +0000
+Subject: net: dsa: fix mv88e6xxx switches
+
+From: Russell King <rmk+kernel@arm.linux.org.uk>
+
+[ Upstream commit db0e51afa481088e6396f11e02018d64113a6578 ]
+
+Since commit 76e398a62712 ("net: dsa: use switchdev obj for VLAN add/del
+ops"), the Marvell 88E6xxx switch has been unable to pass traffic
+between ports - any received traffic is discarded by the switch.
+Taking a port out of bridge mode and configuring a vlan on it also the
+port to start passing traffic.
+
+With the debugfs files re-instated to allow debug of this issue by
+comparing the register settings between the working and non-working
+case, the reason becomes clear:
+
+     GLOBAL GLOBAL2 SERDES   0    1    2    3    4    5    6
+- 7:  1111    707f    2001     2    2    2    2    2    0    2
++ 7:  1111    707f    2001     1    1    1    1    1    0    1
+
+Register 7 for the ports is the default vlan tag register, and in the
+non-working setup, it has been set to 2, despite vlan 2 not being
+configured.  This causes the switch to drop all packets coming in to
+these ports.  The working setup has the default vlan tag register set
+to 1, which is the default vlan when none is configured.
+
+Inspection of the code reveals why.  The code prior to this commit
+was:
+
+-              for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+...
+-                      if (!err && vlan->flags & BRIDGE_VLAN_INFO_PVID)
+-                              err = ds->drv->port_pvid_set(ds, p->port, vid);
+
+but the new code is:
+
++      for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+...
++      }
+...
++      if (pvid)
++              err = _mv88e6xxx_port_pvid_set(ds, port, vid);
+
+This causes the new code to always set the default vlan to one higher
+than the old code.
+
+Fix this.
+
+Fixes: 76e398a62712 ("net: dsa: use switchdev obj for VLAN add/del ops")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/mv88e6xxx.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/dsa/mv88e6xxx.c
++++ b/drivers/net/dsa/mv88e6xxx.c
+@@ -1519,7 +1519,7 @@ int mv88e6xxx_port_vlan_add(struct dsa_s
+       /* no PVID with ranges, otherwise it's a bug */
+       if (pvid)
+-              err = _mv88e6xxx_port_pvid_set(ds, port, vid);
++              err = _mv88e6xxx_port_pvid_set(ds, port, vlan->vid_end);
+ unlock:
+       mutex_unlock(&ps->smi_mutex);
diff --git a/queue-4.4/net-mlx4_en-avoid-changing-dev-features-directly-in-run-time.patch b/queue-4.4/net-mlx4_en-avoid-changing-dev-features-directly-in-run-time.patch
new file mode 100644 (file)
index 0000000..617d0fb
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Wed, 17 Feb 2016 17:24:27 +0200
+Subject: net/mlx4_en: Avoid changing dev->features directly in run-time
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+[ Upstream commit 925ab1aa9394bbaeac47ee5b65d3fdf0fb8135cf ]
+
+It's forbidden to manually change dev->features in run-time. Currently, this is
+done in the driver to make sure that GSO_UDP_TUNNEL is advertized only when
+VXLAN tunnel is set. However, since the stack actually does features intersection
+with hw_enc_features, we can safely revert to advertizing features early when
+registering the netdevice.
+
+Fixes: f4a1edd56120 ('net/mlx4_en: Advertize encapsulation offloads [...]')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_netdev.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+@@ -2381,8 +2381,6 @@ out:
+       /* set offloads */
+       priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
+                                     NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL;
+-      priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+-      priv->dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
+ }
+ static void mlx4_en_del_vxlan_offloads(struct work_struct *work)
+@@ -2393,8 +2391,6 @@ static void mlx4_en_del_vxlan_offloads(s
+       /* unset offloads */
+       priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
+                                     NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL);
+-      priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL;
+-      priv->dev->features    &= ~NETIF_F_GSO_UDP_TUNNEL;
+       ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port,
+                                 VXLAN_STEER_BY_OUTER_MAC, 0);
+@@ -3020,6 +3016,11 @@ int mlx4_en_init_netdev(struct mlx4_en_d
+               priv->rss_hash_fn = ETH_RSS_HASH_TOP;
+       }
++      if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
++              dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
++              dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
++      }
++
+       mdev->pndev[port] = dev;
+       mdev->upper[port] = NULL;
diff --git a/queue-4.4/net-mlx4_en-choose-time-stamping-shift-value-according-to-hw-frequency.patch b/queue-4.4/net-mlx4_en-choose-time-stamping-shift-value-according-to-hw-frequency.patch
new file mode 100644 (file)
index 0000000..ebb2510
--- /dev/null
@@ -0,0 +1,74 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Wed, 17 Feb 2016 17:24:23 +0200
+Subject: net/mlx4_en: Choose time-stamping shift value according to HW frequency
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+[ Upstream commit 31c128b66e5b28f468076e4f3ca3025c35342041 ]
+
+Previously, the shift value used for time-stamping was constant and didn't
+depend on the HW chip frequency. Change that to take the frequency into account
+and calculate the maximal value in cycles per wraparound of ten seconds. This
+time slot was chosen since it gives a good accuracy in time synchronization.
+
+Algorithm for shift value calculation:
+ * Round up the maximal value in cycles to nearest power of two
+
+ * Calculate maximal multiplier by division of all 64 bits set
+   to above result
+
+ * Then, invert the function clocksource_khz2mult() to get the shift from
+   maximal mult value
+
+Fixes: ec693d47010e ('net/mlx4_en: Add HW timestamping (TS) support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Reviewed-by: Matan Barak <matanb@mellanox.com>
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_clock.c |   25 +++++++++++++++++++------
+ 1 file changed, 19 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+@@ -236,6 +236,24 @@ static const struct ptp_clock_info mlx4_
+       .enable         = mlx4_en_phc_enable,
+ };
++#define MLX4_EN_WRAP_AROUND_SEC       10ULL
++
++/* This function calculates the max shift that enables the user range
++ * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register.
++ */
++static u32 freq_to_shift(u16 freq)
++{
++      u32 freq_khz = freq * 1000;
++      u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC;
++      u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ?
++              max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1;
++      /* calculate max possible multiplier in order to fit in 64bit */
++      u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded);
++
++      /* This comes from the reverse of clocksource_khz2mult */
++      return ilog2(div_u64(max_mul * freq_khz, 1000000));
++}
++
+ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
+ {
+       struct mlx4_dev *dev = mdev->dev;
+@@ -254,12 +272,7 @@ void mlx4_en_init_timestamp(struct mlx4_
+       memset(&mdev->cycles, 0, sizeof(mdev->cycles));
+       mdev->cycles.read = mlx4_en_read_clock;
+       mdev->cycles.mask = CLOCKSOURCE_MASK(48);
+-      /* Using shift to make calculation more accurate. Since current HW
+-       * clock frequency is 427 MHz, and cycles are given using a 48 bits
+-       * register, the biggest shift when calculating using u64, is 14
+-       * (max_cycles * multiplier < 2^64)
+-       */
+-      mdev->cycles.shift = 14;
++      mdev->cycles.shift = freq_to_shift(dev->caps.hca_core_clock);
+       mdev->cycles.mult =
+               clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift);
+       mdev->nominal_c_mult = mdev->cycles.mult;
diff --git a/queue-4.4/net-mlx4_en-count-hw-buffer-overrun-only-once.patch b/queue-4.4/net-mlx4_en-count-hw-buffer-overrun-only-once.patch
new file mode 100644 (file)
index 0000000..b208a18
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Amir Vadai <amir@vadai.me>
+Date: Wed, 17 Feb 2016 17:24:22 +0200
+Subject: net/mlx4_en: Count HW buffer overrun only once
+
+From: Amir Vadai <amir@vadai.me>
+
+[ Upstream commit 281e8b2fdf8e4ef366b899453cae50e09b577ada ]
+
+RdropOvflw counts overrun of HW buffer, therefore should
+be used for rx_fifo_errors only.
+
+Currently RdropOvflw counter is mistakenly also set into
+rx_missed_errors and rx_over_errors too, which makes the
+device total dropped packets accounting to show wrong results.
+
+Fix that. Use it for rx_fifo_errors only.
+
+Fixes: c27a02cd94d6 ('mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC')
+Signed-off-by: Amir Vadai <amir@vadai.me>
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_port.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
+@@ -238,11 +238,11 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_e
+       stats->collisions = 0;
+       stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP);
+       stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength);
+-      stats->rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
++      stats->rx_over_errors = 0;
+       stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC);
+       stats->rx_frame_errors = 0;
+       stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
+-      stats->rx_missed_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
++      stats->rx_missed_errors = 0;
+       stats->tx_aborted_errors = 0;
+       stats->tx_carrier_errors = 0;
+       stats->tx_fifo_errors = 0;
diff --git a/queue-4.4/net_sched-fix-reclassification-needs-to-consider-ether-protocol-changes.patch b/queue-4.4/net_sched-fix-reclassification-needs-to-consider-ether-protocol-changes.patch
new file mode 100644 (file)
index 0000000..47733dc
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+Date: Thu, 18 Feb 2016 07:38:04 -0500
+Subject: net_sched fix: reclassification needs to consider ether protocol changes
+
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+
+[ Upstream commit 619fe32640b4b01f370574d50344ae0f62689816 ]
+
+actions could change the etherproto in particular with ethernet
+tunnelled data. Typically such actions, after peeling the outer header,
+will ask for the packet to be  reclassified. We then need to restart
+the classification with the new proto header.
+
+Example setup used to catch this:
+sudo tc qdisc add dev $ETH ingress
+sudo $TC filter add dev $ETH parent ffff: pref 1 protocol 802.1Q \
+u32 match u32 0 0 flowid 1:1 \
+action  vlan pop reclassify
+
+Fixes: 3b3ae880266d ("net: sched: consolidate tc_classify{,_compat}")
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_api.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -1852,6 +1852,7 @@ reset:
+       }
+       tp = old_tp;
++      protocol = tc_skb_protocol(skb);
+       goto reclassify;
+ #endif
+ }
diff --git a/queue-4.4/pppoe-fix-reference-counting-in-pppoe-proxy.patch b/queue-4.4/pppoe-fix-reference-counting-in-pppoe-proxy.patch
new file mode 100644 (file)
index 0000000..34e4e3a
--- /dev/null
@@ -0,0 +1,30 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Mon, 15 Feb 2016 17:01:10 +0100
+Subject: pppoe: fix reference counting in PPPoE proxy
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+[ Upstream commit 29e73269aa4d36f92b35610c25f8b01c789b0dc8 ]
+
+Drop reference on the relay_po socket when __pppoe_xmit() succeeds.
+This is already handled correctly in the error path.
+
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/pppoe.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ppp/pppoe.c
++++ b/drivers/net/ppp/pppoe.c
+@@ -395,6 +395,8 @@ static int pppoe_rcv_core(struct sock *s
+               if (!__pppoe_xmit(sk_pppox(relay_po), skb))
+                       goto abort_put;
++
++              sock_put(sk_pppox(relay_po));
+       } else {
+               if (sock_queue_rcv_skb(sk, skb))
+                       goto abort_kfree;
diff --git a/queue-4.4/pptp-fix-illegal-memory-access-caused-by-multiple-bind-s.patch b/queue-4.4/pptp-fix-illegal-memory-access-caused-by-multiple-bind-s.patch
new file mode 100644 (file)
index 0000000..a4d6ca0
--- /dev/null
@@ -0,0 +1,114 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Date: Fri, 22 Jan 2016 01:39:43 +0100
+Subject: pptp: fix illegal memory access caused by multiple bind()s
+
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+
+[ Upstream commit 9a368aff9cb370298fa02feeffa861f2db497c18 ]
+
+Several times already this has been reported as kasan reports caused by
+syzkaller and trinity and people always looked at RCU races, but it is
+much more simple. :)
+
+In case we bind a pptp socket multiple times, we simply add it to
+the callid_sock list but don't remove the old binding. Thus the old
+socket stays in the bucket with unused call_id indexes and doesn't get
+cleaned up. This causes various forms of kasan reports which were hard
+to pinpoint.
+
+Simply don't allow multiple binds and correct error handling in
+pptp_bind. Also keep sk_state bits in place in pptp_connect.
+
+Fixes: 00959ade36acad ("PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol)")
+Cc: Dmitry Kozlov <xeb@mail.ru>
+Cc: Sasha Levin <sasha.levin@oracle.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Dave Jones <davej@codemonkey.org.uk>
+Reported-by: Dave Jones <davej@codemonkey.org.uk>
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/pptp.c |   34 ++++++++++++++++++++++++----------
+ 1 file changed, 24 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/ppp/pptp.c
++++ b/drivers/net/ppp/pptp.c
+@@ -129,24 +129,27 @@ static int lookup_chan_dst(u16 call_id,
+       return i < MAX_CALLID;
+ }
+-static int add_chan(struct pppox_sock *sock)
++static int add_chan(struct pppox_sock *sock,
++                  struct pptp_addr *sa)
+ {
+       static int call_id;
+       spin_lock(&chan_lock);
+-      if (!sock->proto.pptp.src_addr.call_id) {
++      if (!sa->call_id)       {
+               call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id + 1);
+               if (call_id == MAX_CALLID) {
+                       call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 1);
+                       if (call_id == MAX_CALLID)
+                               goto out_err;
+               }
+-              sock->proto.pptp.src_addr.call_id = call_id;
+-      } else if (test_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap))
++              sa->call_id = call_id;
++      } else if (test_bit(sa->call_id, callid_bitmap)) {
+               goto out_err;
++      }
+-      set_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap);
+-      rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], sock);
++      sock->proto.pptp.src_addr = *sa;
++      set_bit(sa->call_id, callid_bitmap);
++      rcu_assign_pointer(callid_sock[sa->call_id], sock);
+       spin_unlock(&chan_lock);
+       return 0;
+@@ -416,7 +419,6 @@ static int pptp_bind(struct socket *sock
+       struct sock *sk = sock->sk;
+       struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr;
+       struct pppox_sock *po = pppox_sk(sk);
+-      struct pptp_opt *opt = &po->proto.pptp;
+       int error = 0;
+       if (sockaddr_len < sizeof(struct sockaddr_pppox))
+@@ -424,10 +426,22 @@ static int pptp_bind(struct socket *sock
+       lock_sock(sk);
+-      opt->src_addr = sp->sa_addr.pptp;
+-      if (add_chan(po))
++      if (sk->sk_state & PPPOX_DEAD) {
++              error = -EALREADY;
++              goto out;
++      }
++
++      if (sk->sk_state & PPPOX_BOUND) {
+               error = -EBUSY;
++              goto out;
++      }
++
++      if (add_chan(po, &sp->sa_addr.pptp))
++              error = -EBUSY;
++      else
++              sk->sk_state |= PPPOX_BOUND;
++out:
+       release_sock(sk);
+       return error;
+ }
+@@ -498,7 +512,7 @@ static int pptp_connect(struct socket *s
+       }
+       opt->dst_addr = sp->sa_addr.pptp;
+-      sk->sk_state = PPPOX_CONNECTED;
++      sk->sk_state |= PPPOX_CONNECTED;
+  end:
+       release_sock(sk);
diff --git a/queue-4.4/qmi_wwan-add-4g-lte-usb-modem-u901.patch b/queue-4.4/qmi_wwan-add-4g-lte-usb-modem-u901.patch
new file mode 100644 (file)
index 0000000..8ca239b
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
+Date: Fri, 12 Feb 2016 16:42:14 +0100
+Subject: qmi_wwan: add "4G LTE usb-modem U901"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
+
+[ Upstream commit aac8d3c282e024c344c5b86dc1eab7af88bb9716 ]
+
+Thomas reports:
+
+T:  Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#=  4 Spd=480 MxCh= 0
+D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
+P:  Vendor=05c6 ProdID=6001 Rev=00.00
+S:  Manufacturer=USB Modem
+S:  Product=USB Modem
+S:  SerialNumber=1234567890ABCDEF
+C:  #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA
+I:  If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
+I:  If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
+I:  If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
+I:  If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
+I:  If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage
+
+Reported-by: Thomas Schäfer <tschaefer@t-online.de>
+Signed-off-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/qmi_wwan.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -492,6 +492,7 @@ static const struct usb_device_id produc
+       /* 3. Combined interface devices matching on interface number */
+       {QMI_FIXED_INTF(0x0408, 0xea42, 4)},    /* Yota / Megafon M100-1 */
++      {QMI_FIXED_INTF(0x05c6, 0x6001, 3)},    /* 4G LTE usb-modem U901 */
+       {QMI_FIXED_INTF(0x05c6, 0x7000, 0)},
+       {QMI_FIXED_INTF(0x05c6, 0x7001, 1)},
+       {QMI_FIXED_INTF(0x05c6, 0x7002, 1)},
diff --git a/queue-4.4/route-check-and-remove-route-cache-when-we-get-route.patch b/queue-4.4/route-check-and-remove-route-cache-when-we-get-route.patch
new file mode 100644 (file)
index 0000000..9496118
--- /dev/null
@@ -0,0 +1,161 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 18 Feb 2016 21:21:19 +0800
+Subject: route: check and remove route cache when we get route
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit deed49df7390d5239024199e249190328f1651e7 ]
+
+Since the gc of ipv4 route was removed, the route cached would has
+no chance to be removed, and even it has been timeout, it still could
+be used, cause no code to check it's expires.
+
+Fix this issue by checking  and removing route cache when we get route.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ip_fib.h |    1 
+ net/ipv4/route.c     |   77 +++++++++++++++++++++++++++++++++++++++++----------
+ 2 files changed, 64 insertions(+), 14 deletions(-)
+
+--- a/include/net/ip_fib.h
++++ b/include/net/ip_fib.h
+@@ -61,6 +61,7 @@ struct fib_nh_exception {
+       struct rtable __rcu             *fnhe_rth_input;
+       struct rtable __rcu             *fnhe_rth_output;
+       unsigned long                   fnhe_stamp;
++      struct rcu_head                 rcu;
+ };
+ struct fnhe_hash_bucket {
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_most
+ static int ip_rt_min_pmtu __read_mostly               = 512 + 20 + 20;
+ static int ip_rt_min_advmss __read_mostly     = 256;
++static int ip_rt_gc_timeout __read_mostly     = RT_GC_TIMEOUT;
+ /*
+  *    Interface to generic destination cache.
+  */
+@@ -755,7 +756,7 @@ static void __ip_do_redirect(struct rtab
+                               struct fib_nh *nh = &FIB_RES_NH(res);
+                               update_or_create_fnhe(nh, fl4->daddr, new_gw,
+-                                                    0, 0);
++                                              0, jiffies + ip_rt_gc_timeout);
+                       }
+                       if (kill_route)
+                               rt->dst.obsolete = DST_OBSOLETE_KILL;
+@@ -1556,6 +1557,36 @@ static void ip_handle_martian_source(str
+ #endif
+ }
++static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
++{
++      struct fnhe_hash_bucket *hash;
++      struct fib_nh_exception *fnhe, __rcu **fnhe_p;
++      u32 hval = fnhe_hashfun(daddr);
++
++      spin_lock_bh(&fnhe_lock);
++
++      hash = rcu_dereference_protected(nh->nh_exceptions,
++                                       lockdep_is_held(&fnhe_lock));
++      hash += hval;
++
++      fnhe_p = &hash->chain;
++      fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
++      while (fnhe) {
++              if (fnhe->fnhe_daddr == daddr) {
++                      rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
++                              fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
++                      fnhe_flush_routes(fnhe);
++                      kfree_rcu(fnhe, rcu);
++                      break;
++              }
++              fnhe_p = &fnhe->fnhe_next;
++              fnhe = rcu_dereference_protected(fnhe->fnhe_next,
++                                               lockdep_is_held(&fnhe_lock));
++      }
++
++      spin_unlock_bh(&fnhe_lock);
++}
++
+ /* called in rcu_read_lock() section */
+ static int __mkroute_input(struct sk_buff *skb,
+                          const struct fib_result *res,
+@@ -1609,11 +1640,20 @@ static int __mkroute_input(struct sk_buf
+       fnhe = find_exception(&FIB_RES_NH(*res), daddr);
+       if (do_cache) {
+-              if (fnhe)
++              if (fnhe) {
+                       rth = rcu_dereference(fnhe->fnhe_rth_input);
+-              else
+-                      rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
++                      if (rth && rth->dst.expires &&
++                          time_after(jiffies, rth->dst.expires)) {
++                              ip_del_fnhe(&FIB_RES_NH(*res), daddr);
++                              fnhe = NULL;
++                      } else {
++                              goto rt_cache;
++                      }
++              }
++
++              rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
++rt_cache:
+               if (rt_cache_valid(rth)) {
+                       skb_dst_set_noref(skb, &rth->dst);
+                       goto out;
+@@ -2014,19 +2054,29 @@ static struct rtable *__mkroute_output(c
+               struct fib_nh *nh = &FIB_RES_NH(*res);
+               fnhe = find_exception(nh, fl4->daddr);
+-              if (fnhe)
++              if (fnhe) {
+                       prth = &fnhe->fnhe_rth_output;
+-              else {
+-                      if (unlikely(fl4->flowi4_flags &
+-                                   FLOWI_FLAG_KNOWN_NH &&
+-                                   !(nh->nh_gw &&
+-                                     nh->nh_scope == RT_SCOPE_LINK))) {
+-                              do_cache = false;
+-                              goto add;
++                      rth = rcu_dereference(*prth);
++                      if (rth && rth->dst.expires &&
++                          time_after(jiffies, rth->dst.expires)) {
++                              ip_del_fnhe(nh, fl4->daddr);
++                              fnhe = NULL;
++                      } else {
++                              goto rt_cache;
+                       }
+-                      prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
+               }
++
++              if (unlikely(fl4->flowi4_flags &
++                           FLOWI_FLAG_KNOWN_NH &&
++                           !(nh->nh_gw &&
++                             nh->nh_scope == RT_SCOPE_LINK))) {
++                      do_cache = false;
++                      goto add;
++              }
++              prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
+               rth = rcu_dereference(*prth);
++
++rt_cache:
+               if (rt_cache_valid(rth)) {
+                       dst_hold(&rth->dst);
+                       return rth;
+@@ -2569,7 +2619,6 @@ void ip_rt_multicast_event(struct in_dev
+ }
+ #ifdef CONFIG_SYSCTL
+-static int ip_rt_gc_timeout __read_mostly     = RT_GC_TIMEOUT;
+ static int ip_rt_gc_interval __read_mostly  = 60 * HZ;
+ static int ip_rt_gc_min_interval __read_mostly        = HZ / 2;
+ static int ip_rt_gc_elasticity __read_mostly  = 8;
diff --git a/queue-4.4/rtnl-rtm_getnetconf-fix-wrong-return-value.patch b/queue-4.4/rtnl-rtm_getnetconf-fix-wrong-return-value.patch
new file mode 100644 (file)
index 0000000..5282f6d
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Anton Protopopov <a.s.protopopov@gmail.com>
+Date: Tue, 16 Feb 2016 21:43:16 -0500
+Subject: rtnl: RTM_GETNETCONF: fix wrong return value
+
+From: Anton Protopopov <a.s.protopopov@gmail.com>
+
+[ Upstream commit a97eb33ff225f34a8124774b3373fd244f0e83ce ]
+
+An error response from a RTM_GETNETCONF request can return the positive
+error value EINVAL in the struct nlmsgerr that can mislead userspace.
+
+Signed-off-by: Anton Protopopov <a.s.protopopov@gmail.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/devinet.c  |    2 +-
+ net/ipv6/addrconf.c |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/devinet.c
++++ b/net/ipv4/devinet.c
+@@ -1847,7 +1847,7 @@ static int inet_netconf_get_devconf(stru
+       if (err < 0)
+               goto errout;
+-      err = EINVAL;
++      err = -EINVAL;
+       if (!tb[NETCONFA_IFINDEX])
+               goto errout;
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -583,7 +583,7 @@ static int inet6_netconf_get_devconf(str
+       if (err < 0)
+               goto errout;
+-      err = EINVAL;
++      err = -EINVAL;
+       if (!tb[NETCONFA_IFINDEX])
+               goto errout;
diff --git a/queue-4.4/sctp-allow-setting-sctp_sack_immediately-by-the-application.patch b/queue-4.4/sctp-allow-setting-sctp_sack_immediately-by-the-application.patch
new file mode 100644 (file)
index 0000000..a2060d9
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Date: Fri, 22 Jan 2016 18:29:49 -0200
+Subject: sctp: allow setting SCTP_SACK_IMMEDIATELY by the application
+
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+
+[ Upstream commit 27f7ed2b11d42ab6d796e96533c2076ec220affc ]
+
+This patch extends commit b93d6471748d ("sctp: implement the sender side
+for SACK-IMMEDIATELY extension") as it didn't white list
+SCTP_SACK_IMMEDIATELY on sctp_msghdr_parse(), causing it to be
+understood as an invalid flag and returning -EINVAL to the application.
+
+Note that the actual handling of the flag is already there in
+sctp_datamsg_from_user().
+
+https://tools.ietf.org/html/rfc7053#section-7
+
+Fixes: b93d6471748d ("sctp: implement the sender side for SACK-IMMEDIATELY extension")
+Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Vlad Yasevich <vyasevich@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -6640,6 +6640,7 @@ static int sctp_msghdr_parse(const struc
+                       if (cmsgs->srinfo->sinfo_flags &
+                           ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
++                            SCTP_SACK_IMMEDIATELY |
+                             SCTP_ABORT | SCTP_EOF))
+                               return -EINVAL;
+                       break;
+@@ -6663,6 +6664,7 @@ static int sctp_msghdr_parse(const struc
+                       if (cmsgs->sinfo->snd_flags &
+                           ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
++                            SCTP_SACK_IMMEDIATELY |
+                             SCTP_ABORT | SCTP_EOF))
+                               return -EINVAL;
+                       break;
diff --git a/queue-4.4/sctp-fix-port-hash-table-size-computation.patch b/queue-4.4/sctp-fix-port-hash-table-size-computation.patch
new file mode 100644 (file)
index 0000000..7d6f3c2
--- /dev/null
@@ -0,0 +1,131 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Neil Horman <nhorman@tuxdriver.com>
+Date: Thu, 18 Feb 2016 16:10:57 -0500
+Subject: sctp: Fix port hash table size computation
+
+From: Neil Horman <nhorman@tuxdriver.com>
+
+[ Upstream commit d9749fb5942f51555dc9ce1ac0dbb1806960a975 ]
+
+Dmitry Vyukov noted recently that the sctp_port_hashtable had an error in
+its size computation, observing that the current method never guaranteed
+that the hashsize (measured in number of entries) would be a power of two,
+which the input hash function for that table requires.  The root cause of
+the problem is that two values need to be computed (one, the allocation
+order of the storage requries, as passed to __get_free_pages, and two the
+number of entries for the hash table).  Both need to be ^2, but for
+different reasons, and the existing code is simply computing one order
+value, and using it as the basis for both, which is wrong (i.e. it assumes
+that ((1<<order)*PAGE_SIZE)/sizeof(bucket) is still ^2 when its not).
+
+To fix this, we change the logic slightly.  We start by computing a goal
+allocation order (which is limited by the maximum size hash table we want
+to support.  Then we attempt to allocate that size table, decreasing the
+order until a successful allocation is made.  Then, with the resultant
+successful order we compute the number of buckets that hash table supports,
+which we then round down to the nearest power of two, giving us the number
+of entries the table actually supports.
+
+I've tested this locally here, using non-debug and spinlock-debug kernels,
+and the number of entries in the hashtable consistently work out to be
+powers of two in all cases.
+
+Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+CC: Dmitry Vyukov <dvyukov@google.com>
+CC: Vladislav Yasevich <vyasevich@gmail.com>
+CC: "David S. Miller" <davem@davemloft.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/protocol.c |   43 ++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 36 insertions(+), 7 deletions(-)
+
+--- a/net/sctp/protocol.c
++++ b/net/sctp/protocol.c
+@@ -60,6 +60,8 @@
+ #include <net/inet_common.h>
+ #include <net/inet_ecn.h>
++#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024)
++
+ /* Global data structures. */
+ struct sctp_globals sctp_globals __read_mostly;
+@@ -1352,6 +1354,8 @@ static __init int sctp_init(void)
+       unsigned long limit;
+       int max_share;
+       int order;
++      int num_entries;
++      int max_entry_order;
+       sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
+@@ -1404,14 +1408,24 @@ static __init int sctp_init(void)
+       /* Size and allocate the association hash table.
+        * The methodology is similar to that of the tcp hash tables.
++       * Though not identical.  Start by getting a goal size
+        */
+       if (totalram_pages >= (128 * 1024))
+               goal = totalram_pages >> (22 - PAGE_SHIFT);
+       else
+               goal = totalram_pages >> (24 - PAGE_SHIFT);
+-      for (order = 0; (1UL << order) < goal; order++)
+-              ;
++      /* Then compute the page order for said goal */
++      order = get_order(goal);
++
++      /* Now compute the required page order for the maximum sized table we
++       * want to create
++       */
++      max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES *
++                                  sizeof(struct sctp_bind_hashbucket));
++
++      /* Limit the page order by that maximum hash table size */
++      order = min(order, max_entry_order);
+       do {
+               sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE /
+@@ -1445,20 +1459,35 @@ static __init int sctp_init(void)
+               INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain);
+       }
+-      /* Allocate and initialize the SCTP port hash table.  */
++      /* Allocate and initialize the SCTP port hash table.
++       * Note that order is initalized to start at the max sized
++       * table we want to support.  If we can't get that many pages
++       * reduce the order and try again
++       */
+       do {
+-              sctp_port_hashsize = (1UL << order) * PAGE_SIZE /
+-                                      sizeof(struct sctp_bind_hashbucket);
+-              if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
+-                      continue;
+               sctp_port_hashtable = (struct sctp_bind_hashbucket *)
+                       __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
+       } while (!sctp_port_hashtable && --order > 0);
++
+       if (!sctp_port_hashtable) {
+               pr_err("Failed bind hash alloc\n");
+               status = -ENOMEM;
+               goto err_bhash_alloc;
+       }
++
++      /* Now compute the number of entries that will fit in the
++       * port hash space we allocated
++       */
++      num_entries = (1UL << order) * PAGE_SIZE /
++                    sizeof(struct sctp_bind_hashbucket);
++
++      /* And finish by rounding it down to the nearest power of two
++       * this wastes some memory of course, but its needed because
++       * the hash function operates based on the assumption that
++       * that the number of entries is a power of two
++       */
++      sctp_port_hashsize = rounddown_pow_of_two(num_entries);
++
+       for (i = 0; i < sctp_port_hashsize; i++) {
+               spin_lock_init(&sctp_port_hashtable[i].lock);
+               INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
diff --git a/queue-4.4/sctp-translate-network-order-to-host-order-when-users-get-a-hmacid.patch b/queue-4.4/sctp-translate-network-order-to-host-order-when-users-get-a-hmacid.patch
new file mode 100644 (file)
index 0000000..cd3ac0d
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 3 Feb 2016 23:33:30 +0800
+Subject: sctp: translate network order to host order when users get a hmacid
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 7a84bd46647ff181eb2659fdc99590e6f16e501d ]
+
+Commit ed5a377d87dc ("sctp: translate host order to network order when
+setting a hmacid") corrected the hmacid byte-order when setting a hmacid.
+but the same issue also exists on getting a hmacid.
+
+We fix it by changing hmacids to host order when users get them with
+getsockopt.
+
+Fixes: Commit ed5a377d87dc ("sctp: translate host order to network order when setting a hmacid")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -5542,6 +5542,7 @@ static int sctp_getsockopt_hmac_ident(st
+       struct sctp_hmac_algo_param *hmacs;
+       __u16 data_len = 0;
+       u32 num_idents;
++      int i;
+       if (!ep->auth_enable)
+               return -EACCES;
+@@ -5559,8 +5560,12 @@ static int sctp_getsockopt_hmac_ident(st
+               return -EFAULT;
+       if (put_user(num_idents, &p->shmac_num_idents))
+               return -EFAULT;
+-      if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len))
+-              return -EFAULT;
++      for (i = 0; i < num_idents; i++) {
++              __u16 hmacid = ntohs(hmacs->hmac_ids[i]);
++
++              if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16)))
++                      return -EFAULT;
++      }
+       return 0;
+ }
diff --git a/queue-4.4/series b/queue-4.4/series
new file mode 100644 (file)
index 0000000..8cd6731
--- /dev/null
@@ -0,0 +1,47 @@
+af_iucv-validate-socket-address-length-in-iucv_sock_bind.patch
+gro-make-gro-aware-of-lightweight-tunnels.patch
+net-dp83640-fix-tx-timestamp-overflow-handling.patch
+tunnels-allow-ipv6-udp-checksums-to-be-correctly-controlled.patch
+lwt-fix-rx-checksum-setting-for-lwt-devices-tunneling-over-ipv6.patch
+tcp-fix-null-deref-in-tcp_v4_send_ack.patch
+af_unix-fix-struct-pid-memory-leak.patch
+pptp-fix-illegal-memory-access-caused-by-multiple-bind-s.patch
+sctp-allow-setting-sctp_sack_immediately-by-the-application.patch
+net-dsa-fix-mv88e6xxx-switches.patch
+tipc-fix-connection-abort-during-subscription-cancel.patch
+inet-frag-always-orphan-skbs-inside-ip_defrag.patch
+switchdev-require-rtnl-mutex-to-be-held-when-sending-fdb-notifications.patch
+tcp-beware-of-alignments-in-tcp_get_info.patch
+ipv6-enforce-flowi6_oif-usage-in-ip6_dst_lookup_tail.patch
+ipv6-udp-use-sticky-pktinfo-egress-ifindex-on-connect.patch
+ipv6-addrconf-fix-recursive-spin-lock-call.patch
+ipv6-fix-a-lockdep-splat.patch
+unix-correctly-track-in-flight-fds-in-sending-process-user_struct.patch
+tcp-do-not-drop-syn_recv-on-all-icmp-reports.patch
+net-add-sysctl_max_skb_frags.patch
+tg3-fix-for-tg3-transmit-queue-0-timed-out-when-too-many-gso_segs.patch
+enic-increment-devcmd2-result-ring-in-case-of-timeout.patch
+sctp-translate-network-order-to-host-order-when-users-get-a-hmacid.patch
+net-copy-inner-l3-and-l4-headers-as-unaligned-on-gre-teb.patch
+flow_dissector-fix-unaligned-access-in-__skb_flow_dissector-when-used-by-eth_get_headlen.patch
+bpf-fix-branch-offset-adjustment-on-backjumps-after-patching-ctx-expansion.patch
+bonding-fix-arp-monitor-validation.patch
+ipv4-fix-memory-leaks-in-ip_cmsg_send-callers.patch
+af_unix-don-t-set-err-in-unix_stream_read_generic-unless-there-was-an-error.patch
+af_unix-guard-against-other-sk-in-unix_dgram_sendmsg.patch
+tipc-fix-premature-addition-of-node-to-lookup-table.patch
+tcp-md5-release-request-socket-instead-of-listener.patch
+qmi_wwan-add-4g-lte-usb-modem-u901.patch
+net-mlx4_en-count-hw-buffer-overrun-only-once.patch
+net-mlx4_en-choose-time-stamping-shift-value-according-to-hw-frequency.patch
+net-mlx4_en-avoid-changing-dev-features-directly-in-run-time.patch
+l2tp-fix-error-creating-l2tp-tunnels.patch
+pppoe-fix-reference-counting-in-pppoe-proxy.patch
+net_sched-fix-reclassification-needs-to-consider-ether-protocol-changes.patch
+route-check-and-remove-route-cache-when-we-get-route.patch
+tcp-dccp-fix-another-race-at-listener-dismantle.patch
+iff_no_queue-fix-for-drivers-not-calling-ether_setup.patch
+rtnl-rtm_getnetconf-fix-wrong-return-value.patch
+tipc-unlock-in-error-path.patch
+unix_diag-fix-incorrect-sign-extension-in-unix_lookup_by_ino.patch
+sctp-fix-port-hash-table-size-computation.patch
diff --git a/queue-4.4/switchdev-require-rtnl-mutex-to-be-held-when-sending-fdb-notifications.patch b/queue-4.4/switchdev-require-rtnl-mutex-to-be-held-when-sending-fdb-notifications.patch
new file mode 100644 (file)
index 0000000..cd859bb
--- /dev/null
@@ -0,0 +1,167 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Wed, 27 Jan 2016 15:16:43 +0100
+Subject: switchdev: Require RTNL mutex to be held when sending FDB notifications
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit 4f2c6ae5c64c353fb1b0425e4747e5603feadba1 ]
+
+When switchdev drivers process FDB notifications from the underlying
+device they resolve the netdev to which the entry points to and notify
+the bridge using the switchdev notifier.
+
+However, since the RTNL mutex is not held there is nothing preventing
+the netdev from disappearing in the middle, which will cause
+br_switchdev_event() to dereference a non-existing netdev.
+
+Make switchdev drivers hold the lock at the beginning of the
+notification processing session and release it once it ends, after
+notifying the bridge.
+
+Also, remove switchdev_mutex and fdb_lock, as they are no longer needed
+when RTNL mutex is held.
+
+Fixes: 03bf0c281234 ("switchdev: introduce switchdev notifier")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c |    3 +++
+ drivers/net/ethernet/rocker/rocker.c                     |    2 ++
+ net/bridge/br.c                                          |    3 +--
+ net/switchdev/switchdev.c                                |   15 ++++++++-------
+ 4 files changed, 14 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+@@ -45,6 +45,7 @@
+ #include <linux/if_bridge.h>
+ #include <linux/workqueue.h>
+ #include <linux/jiffies.h>
++#include <linux/rtnetlink.h>
+ #include <net/switchdev.h>
+ #include "spectrum.h"
+@@ -812,6 +813,7 @@ static void mlxsw_sp_fdb_notify_work(str
+       mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work);
++      rtnl_lock();
+       do {
+               mlxsw_reg_sfn_pack(sfn_pl);
+               err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl);
+@@ -824,6 +826,7 @@ static void mlxsw_sp_fdb_notify_work(str
+                       mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i);
+       } while (num_rec);
++      rtnl_unlock();
+       kfree(sfn_pl);
+       mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
+--- a/drivers/net/ethernet/rocker/rocker.c
++++ b/drivers/net/ethernet/rocker/rocker.c
+@@ -3531,12 +3531,14 @@ static void rocker_port_fdb_learn_work(s
+       info.addr = lw->addr;
+       info.vid = lw->vid;
++      rtnl_lock();
+       if (learned && removing)
+               call_switchdev_notifiers(SWITCHDEV_FDB_DEL,
+                                        lw->rocker_port->dev, &info.info);
+       else if (learned && !removing)
+               call_switchdev_notifiers(SWITCHDEV_FDB_ADD,
+                                        lw->rocker_port->dev, &info.info);
++      rtnl_unlock();
+       rocker_port_kfree(lw->trans, work);
+ }
+--- a/net/bridge/br.c
++++ b/net/bridge/br.c
+@@ -121,6 +121,7 @@ static struct notifier_block br_device_n
+       .notifier_call = br_device_event
+ };
++/* called with RTNL */
+ static int br_switchdev_event(struct notifier_block *unused,
+                             unsigned long event, void *ptr)
+ {
+@@ -130,7 +131,6 @@ static int br_switchdev_event(struct not
+       struct switchdev_notifier_fdb_info *fdb_info;
+       int err = NOTIFY_DONE;
+-      rtnl_lock();
+       p = br_port_get_rtnl(dev);
+       if (!p)
+               goto out;
+@@ -155,7 +155,6 @@ static int br_switchdev_event(struct not
+       }
+ out:
+-      rtnl_unlock();
+       return err;
+ }
+--- a/net/switchdev/switchdev.c
++++ b/net/switchdev/switchdev.c
+@@ -20,6 +20,7 @@
+ #include <linux/list.h>
+ #include <linux/workqueue.h>
+ #include <linux/if_vlan.h>
++#include <linux/rtnetlink.h>
+ #include <net/ip_fib.h>
+ #include <net/switchdev.h>
+@@ -565,7 +566,6 @@ int switchdev_port_obj_dump(struct net_d
+ }
+ EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
+-static DEFINE_MUTEX(switchdev_mutex);
+ static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
+ /**
+@@ -580,9 +580,9 @@ int register_switchdev_notifier(struct n
+ {
+       int err;
+-      mutex_lock(&switchdev_mutex);
++      rtnl_lock();
+       err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
+-      mutex_unlock(&switchdev_mutex);
++      rtnl_unlock();
+       return err;
+ }
+ EXPORT_SYMBOL_GPL(register_switchdev_notifier);
+@@ -598,9 +598,9 @@ int unregister_switchdev_notifier(struct
+ {
+       int err;
+-      mutex_lock(&switchdev_mutex);
++      rtnl_lock();
+       err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
+-      mutex_unlock(&switchdev_mutex);
++      rtnl_unlock();
+       return err;
+ }
+ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
+@@ -614,16 +614,17 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_n
+  *    Call all network notifier blocks. This should be called by driver
+  *    when it needs to propagate hardware event.
+  *    Return values are same as for atomic_notifier_call_chain().
++ *    rtnl_lock must be held.
+  */
+ int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
+                            struct switchdev_notifier_info *info)
+ {
+       int err;
++      ASSERT_RTNL();
++
+       info->dev = dev;
+-      mutex_lock(&switchdev_mutex);
+       err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
+-      mutex_unlock(&switchdev_mutex);
+       return err;
+ }
+ EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
diff --git a/queue-4.4/tcp-beware-of-alignments-in-tcp_get_info.patch b/queue-4.4/tcp-beware-of-alignments-in-tcp_get_info.patch
new file mode 100644 (file)
index 0000000..d2fc6f4
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 27 Jan 2016 10:52:43 -0800
+Subject: tcp: beware of alignments in tcp_get_info()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ff5d749772018602c47509bdc0093ff72acd82ec ]
+
+With some combinations of user provided flags in netlink command,
+it is possible to call tcp_get_info() with a buffer that is not 8-bytes
+aligned.
+
+It does matter on some arches, so we need to use put_unaligned() to
+store the u64 fields.
+
+Current iproute2 package does not trigger this particular issue.
+
+Fixes: 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info")
+Fixes: 977cb0ecf82e ("tcp: add pacing_rate information into tcp_info")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -279,6 +279,7 @@
+ #include <asm/uaccess.h>
+ #include <asm/ioctls.h>
++#include <asm/unaligned.h>
+ #include <net/busy_poll.h>
+ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
+@@ -2637,6 +2638,7 @@ void tcp_get_info(struct sock *sk, struc
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       u32 now = tcp_time_stamp;
+       unsigned int start;
++      u64 rate64;
+       u32 rate;
+       memset(info, 0, sizeof(*info));
+@@ -2702,15 +2704,17 @@ void tcp_get_info(struct sock *sk, struc
+       info->tcpi_total_retrans = tp->total_retrans;
+       rate = READ_ONCE(sk->sk_pacing_rate);
+-      info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL;
++      rate64 = rate != ~0U ? rate : ~0ULL;
++      put_unaligned(rate64, &info->tcpi_pacing_rate);
+       rate = READ_ONCE(sk->sk_max_pacing_rate);
+-      info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
++      rate64 = rate != ~0U ? rate : ~0ULL;
++      put_unaligned(rate64, &info->tcpi_max_pacing_rate);
+       do {
+               start = u64_stats_fetch_begin_irq(&tp->syncp);
+-              info->tcpi_bytes_acked = tp->bytes_acked;
+-              info->tcpi_bytes_received = tp->bytes_received;
++              put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
++              put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
+       } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
+       info->tcpi_segs_out = tp->segs_out;
+       info->tcpi_segs_in = tp->segs_in;
diff --git a/queue-4.4/tcp-dccp-fix-another-race-at-listener-dismantle.patch b/queue-4.4/tcp-dccp-fix-another-race-at-listener-dismantle.patch
new file mode 100644 (file)
index 0000000..a1a4a10
--- /dev/null
@@ -0,0 +1,263 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 18 Feb 2016 05:39:18 -0800
+Subject: tcp/dccp: fix another race at listener dismantle
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7716682cc58e305e22207d5bb315f26af6b1e243 ]
+
+Ilya reported following lockdep splat:
+
+kernel: =========================
+kernel: [ BUG: held lock freed! ]
+kernel: 4.5.0-rc1-ceph-00026-g5e0a311 #1 Not tainted
+kernel: -------------------------
+kernel: swapper/5/0 is freeing memory
+ffff880035c9d200-ffff880035c9dbff, with a lock still held there!
+kernel: (&(&queue->rskq_lock)->rlock){+.-...}, at:
+[<ffffffff816f6a88>] inet_csk_reqsk_queue_add+0x28/0xa0
+kernel: 4 locks held by swapper/5/0:
+kernel: #0:  (rcu_read_lock){......}, at: [<ffffffff8169ef6b>]
+netif_receive_skb_internal+0x4b/0x1f0
+kernel: #1:  (rcu_read_lock){......}, at: [<ffffffff816e977f>]
+ip_local_deliver_finish+0x3f/0x380
+kernel: #2:  (slock-AF_INET){+.-...}, at: [<ffffffff81685ffb>]
+sk_clone_lock+0x19b/0x440
+kernel: #3:  (&(&queue->rskq_lock)->rlock){+.-...}, at:
+[<ffffffff816f6a88>] inet_csk_reqsk_queue_add+0x28/0xa0
+
+To properly fix this issue, inet_csk_reqsk_queue_add() needs
+to return to its callers if the child as been queued
+into accept queue.
+
+We also need to make sure listener is still there before
+calling sk->sk_data_ready(), by holding a reference on it,
+since the reference carried by the child can disappear as
+soon as the child is put on accept queue.
+
+Reported-by: Ilya Dryomov <idryomov@gmail.com>
+Fixes: ebb516af60e1 ("tcp/dccp: fix race at listener dismantle phase")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_connection_sock.h |    5 +++--
+ net/dccp/ipv4.c                    |   14 +++++++-------
+ net/dccp/ipv6.c                    |   14 +++++++-------
+ net/ipv4/inet_connection_sock.c    |   14 +++++++-------
+ net/ipv4/tcp_ipv4.c                |   14 +++++++-------
+ net/ipv6/tcp_ipv6.c                |   14 +++++++-------
+ 6 files changed, 38 insertions(+), 37 deletions(-)
+
+--- a/include/net/inet_connection_sock.h
++++ b/include/net/inet_connection_sock.h
+@@ -270,8 +270,9 @@ struct dst_entry *inet_csk_route_child_s
+                                           struct sock *newsk,
+                                           const struct request_sock *req);
+-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
+-                            struct sock *child);
++struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
++                                    struct request_sock *req,
++                                    struct sock *child);
+ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+                                  unsigned long timeout);
+ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -824,26 +824,26 @@ lookup:
+       if (sk->sk_state == DCCP_NEW_SYN_RECV) {
+               struct request_sock *req = inet_reqsk(sk);
+-              struct sock *nsk = NULL;
++              struct sock *nsk;
+               sk = req->rsk_listener;
+-              if (likely(sk->sk_state == DCCP_LISTEN)) {
+-                      nsk = dccp_check_req(sk, skb, req);
+-              } else {
++              if (unlikely(sk->sk_state != DCCP_LISTEN)) {
+                       inet_csk_reqsk_queue_drop_and_put(sk, req);
+                       goto lookup;
+               }
++              sock_hold(sk);
++              nsk = dccp_check_req(sk, skb, req);
+               if (!nsk) {
+                       reqsk_put(req);
+-                      goto discard_it;
++                      goto discard_and_relse;
+               }
+               if (nsk == sk) {
+-                      sock_hold(sk);
+                       reqsk_put(req);
+               } else if (dccp_child_process(sk, nsk, skb)) {
+                       dccp_v4_ctl_send_reset(sk, skb);
+-                      goto discard_it;
++                      goto discard_and_relse;
+               } else {
++                      sock_put(sk);
+                       return 0;
+               }
+       }
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -691,26 +691,26 @@ lookup:
+       if (sk->sk_state == DCCP_NEW_SYN_RECV) {
+               struct request_sock *req = inet_reqsk(sk);
+-              struct sock *nsk = NULL;
++              struct sock *nsk;
+               sk = req->rsk_listener;
+-              if (likely(sk->sk_state == DCCP_LISTEN)) {
+-                      nsk = dccp_check_req(sk, skb, req);
+-              } else {
++              if (unlikely(sk->sk_state != DCCP_LISTEN)) {
+                       inet_csk_reqsk_queue_drop_and_put(sk, req);
+                       goto lookup;
+               }
++              sock_hold(sk);
++              nsk = dccp_check_req(sk, skb, req);
+               if (!nsk) {
+                       reqsk_put(req);
+-                      goto discard_it;
++                      goto discard_and_relse;
+               }
+               if (nsk == sk) {
+-                      sock_hold(sk);
+                       reqsk_put(req);
+               } else if (dccp_child_process(sk, nsk, skb)) {
+                       dccp_v6_ctl_send_reset(sk, skb);
+-                      goto discard_it;
++                      goto discard_and_relse;
+               } else {
++                      sock_put(sk);
+                       return 0;
+               }
+       }
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -789,14 +789,16 @@ static void inet_child_forget(struct soc
+       reqsk_put(req);
+ }
+-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
+-                            struct sock *child)
++struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
++                                    struct request_sock *req,
++                                    struct sock *child)
+ {
+       struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+       spin_lock(&queue->rskq_lock);
+       if (unlikely(sk->sk_state != TCP_LISTEN)) {
+               inet_child_forget(sk, req, child);
++              child = NULL;
+       } else {
+               req->sk = child;
+               req->dl_next = NULL;
+@@ -808,6 +810,7 @@ void inet_csk_reqsk_queue_add(struct soc
+               sk_acceptq_added(sk);
+       }
+       spin_unlock(&queue->rskq_lock);
++      return child;
+ }
+ EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
+@@ -817,11 +820,8 @@ struct sock *inet_csk_complete_hashdance
+       if (own_req) {
+               inet_csk_reqsk_queue_drop(sk, req);
+               reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
+-              inet_csk_reqsk_queue_add(sk, req, child);
+-              /* Warning: caller must not call reqsk_put(req);
+-               * child stole last reference on it.
+-               */
+-              return child;
++              if (inet_csk_reqsk_queue_add(sk, req, child))
++                      return child;
+       }
+       /* Too bad, another child took ownership of the request, undo. */
+       bh_unlock_sock(child);
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1594,30 +1594,30 @@ process:
+       if (sk->sk_state == TCP_NEW_SYN_RECV) {
+               struct request_sock *req = inet_reqsk(sk);
+-              struct sock *nsk = NULL;
++              struct sock *nsk;
+               sk = req->rsk_listener;
+               if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+                       reqsk_put(req);
+                       goto discard_it;
+               }
+-              if (likely(sk->sk_state == TCP_LISTEN)) {
+-                      nsk = tcp_check_req(sk, skb, req, false);
+-              } else {
++              if (unlikely(sk->sk_state != TCP_LISTEN)) {
+                       inet_csk_reqsk_queue_drop_and_put(sk, req);
+                       goto lookup;
+               }
++              sock_hold(sk);
++              nsk = tcp_check_req(sk, skb, req, false);
+               if (!nsk) {
+                       reqsk_put(req);
+-                      goto discard_it;
++                      goto discard_and_relse;
+               }
+               if (nsk == sk) {
+-                      sock_hold(sk);
+                       reqsk_put(req);
+               } else if (tcp_child_process(sk, nsk, skb)) {
+                       tcp_v4_send_reset(nsk, skb);
+-                      goto discard_it;
++                      goto discard_and_relse;
+               } else {
++                      sock_put(sk);
+                       return 0;
+               }
+       }
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1388,7 +1388,7 @@ process:
+       if (sk->sk_state == TCP_NEW_SYN_RECV) {
+               struct request_sock *req = inet_reqsk(sk);
+-              struct sock *nsk = NULL;
++              struct sock *nsk;
+               sk = req->rsk_listener;
+               tcp_v6_fill_cb(skb, hdr, th);
+@@ -1396,24 +1396,24 @@ process:
+                       reqsk_put(req);
+                       goto discard_it;
+               }
+-              if (likely(sk->sk_state == TCP_LISTEN)) {
+-                      nsk = tcp_check_req(sk, skb, req, false);
+-              } else {
++              if (unlikely(sk->sk_state != TCP_LISTEN)) {
+                       inet_csk_reqsk_queue_drop_and_put(sk, req);
+                       goto lookup;
+               }
++              sock_hold(sk);
++              nsk = tcp_check_req(sk, skb, req, false);
+               if (!nsk) {
+                       reqsk_put(req);
+-                      goto discard_it;
++                      goto discard_and_relse;
+               }
+               if (nsk == sk) {
+-                      sock_hold(sk);
+                       reqsk_put(req);
+                       tcp_v6_restore_cb(skb);
+               } else if (tcp_child_process(sk, nsk, skb)) {
+                       tcp_v6_send_reset(nsk, skb);
+-                      goto discard_it;
++                      goto discard_and_relse;
+               } else {
++                      sock_put(sk);
+                       return 0;
+               }
+       }
diff --git a/queue-4.4/tcp-do-not-drop-syn_recv-on-all-icmp-reports.patch b/queue-4.4/tcp-do-not-drop-syn_recv-on-all-icmp-reports.patch
new file mode 100644 (file)
index 0000000..55a018a
--- /dev/null
@@ -0,0 +1,105 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 2 Feb 2016 19:31:12 -0800
+Subject: tcp: do not drop syn_recv on all icmp reports
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9cf7490360bf2c46a16b7525f899e4970c5fc144 ]
+
+Petr Novopashenniy reported that ICMP redirects on SYN_RECV sockets
+were leading to RST.
+
+This is of course incorrect.
+
+A specific list of ICMP messages should be able to drop a SYN_RECV.
+
+For instance, a REDIRECT on SYN_RECV shall be ignored, as we do
+not hold a dst per SYN_RECV pseudo request.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111751
+Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
+Reported-by: Petr Novopashenniy <pety@rusnet.ru>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h   |    2 +-
+ net/ipv4/tcp_ipv4.c |   11 ++++++++---
+ net/ipv6/tcp_ipv6.c |    5 +++--
+ 3 files changed, 12 insertions(+), 6 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -449,7 +449,7 @@ const u8 *tcp_parse_md5sig_option(const
+ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
+ void tcp_v4_mtu_reduced(struct sock *sk);
+-void tcp_req_err(struct sock *sk, u32 seq);
++void tcp_req_err(struct sock *sk, u32 seq, bool abort);
+ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
+ struct sock *tcp_create_openreq_child(const struct sock *sk,
+                                     struct request_sock *req,
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -312,7 +312,7 @@ static void do_redirect(struct sk_buff *
+ /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
+-void tcp_req_err(struct sock *sk, u32 seq)
++void tcp_req_err(struct sock *sk, u32 seq, bool abort)
+ {
+       struct request_sock *req = inet_reqsk(sk);
+       struct net *net = sock_net(sk);
+@@ -324,7 +324,7 @@ void tcp_req_err(struct sock *sk, u32 se
+       if (seq != tcp_rsk(req)->snt_isn) {
+               NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+-      } else {
++      } else if (abort) {
+               /*
+                * Still in SYN_RECV, just remove it silently.
+                * There is no good way to pass the error to the newly
+@@ -384,7 +384,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb
+       }
+       seq = ntohl(th->seq);
+       if (sk->sk_state == TCP_NEW_SYN_RECV)
+-              return tcp_req_err(sk, seq);
++              return tcp_req_err(sk, seq,
++                                type == ICMP_PARAMETERPROB ||
++                                type == ICMP_TIME_EXCEEDED ||
++                                (type == ICMP_DEST_UNREACH &&
++                                 (code == ICMP_NET_UNREACH ||
++                                  code == ICMP_HOST_UNREACH)));
+       bh_lock_sock(sk);
+       /* If too many ICMPs get dropped on busy
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -328,6 +328,7 @@ static void tcp_v6_err(struct sk_buff *s
+       struct tcp_sock *tp;
+       __u32 seq, snd_una;
+       struct sock *sk;
++      bool fatal;
+       int err;
+       sk = __inet6_lookup_established(net, &tcp_hashinfo,
+@@ -346,8 +347,9 @@ static void tcp_v6_err(struct sk_buff *s
+               return;
+       }
+       seq = ntohl(th->seq);
++      fatal = icmpv6_err_convert(type, code, &err);
+       if (sk->sk_state == TCP_NEW_SYN_RECV)
+-              return tcp_req_err(sk, seq);
++              return tcp_req_err(sk, seq, fatal);
+       bh_lock_sock(sk);
+       if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
+@@ -401,7 +403,6 @@ static void tcp_v6_err(struct sk_buff *s
+               goto out;
+       }
+-      icmpv6_err_convert(type, code, &err);
+       /* Might be for an request_sock */
+       switch (sk->sk_state) {
diff --git a/queue-4.4/tcp-fix-null-deref-in-tcp_v4_send_ack.patch b/queue-4.4/tcp-fix-null-deref-in-tcp_v4_send_ack.patch
new file mode 100644 (file)
index 0000000..ebdf295
--- /dev/null
@@ -0,0 +1,95 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 21 Jan 2016 08:02:54 -0800
+Subject: tcp: fix NULL deref in tcp_v4_send_ack()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e62a123b8ef7c5dc4db2c16383d506860ad21b47 ]
+
+Neal reported crashes with this stack trace :
+
+ RIP: 0010:[<ffffffff8c57231b>] tcp_v4_send_ack+0x41/0x20f
+...
+ CR2: 0000000000000018 CR3: 000000044005c000 CR4: 00000000001427e0
+...
+  [<ffffffff8c57258e>] tcp_v4_reqsk_send_ack+0xa5/0xb4
+  [<ffffffff8c1a7caa>] tcp_check_req+0x2ea/0x3e0
+  [<ffffffff8c19e420>] tcp_rcv_state_process+0x850/0x2500
+  [<ffffffff8c1a6d21>] tcp_v4_do_rcv+0x141/0x330
+  [<ffffffff8c56cdb2>] sk_backlog_rcv+0x21/0x30
+  [<ffffffff8c098bbd>] tcp_recvmsg+0x75d/0xf90
+  [<ffffffff8c0a8700>] inet_recvmsg+0x80/0xa0
+  [<ffffffff8c17623e>] sock_aio_read+0xee/0x110
+  [<ffffffff8c066fcf>] do_sync_read+0x6f/0xa0
+  [<ffffffff8c0673a1>] SyS_read+0x1e1/0x290
+  [<ffffffff8c5ca262>] system_call_fastpath+0x16/0x1b
+
+The problem here is the skb we provide to tcp_v4_send_ack() had to
+be parked in the backlog of a new TCP fastopen child because this child
+was owned by the user at the time an out of window packet arrived.
+
+Before queuing a packet, TCP has to set skb->dev to NULL as the device
+could disappear before packet is removed from the queue.
+
+Fix this issue by using the net pointer provided by the socket (being a
+timewait or a request socket).
+
+IPv6 is immune to the bug : tcp_v6_send_response() already gets the net
+pointer from the socket if provided.
+
+Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path")
+Reported-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Jerry Chu <hkchu@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_ipv4.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -705,7 +705,8 @@ release_sk1:
+    outside socket context is ugly, certainly. What can I do?
+  */
+-static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
++static void tcp_v4_send_ack(struct net *net,
++                          struct sk_buff *skb, u32 seq, u32 ack,
+                           u32 win, u32 tsval, u32 tsecr, int oif,
+                           struct tcp_md5sig_key *key,
+                           int reply_flags, u8 tos)
+@@ -720,7 +721,6 @@ static void tcp_v4_send_ack(struct sk_bu
+                       ];
+       } rep;
+       struct ip_reply_arg arg;
+-      struct net *net = dev_net(skb_dst(skb)->dev);
+       memset(&rep.th, 0, sizeof(struct tcphdr));
+       memset(&arg, 0, sizeof(arg));
+@@ -782,7 +782,8 @@ static void tcp_v4_timewait_ack(struct s
+       struct inet_timewait_sock *tw = inet_twsk(sk);
+       struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+-      tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
++      tcp_v4_send_ack(sock_net(sk), skb,
++                      tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+                       tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+                       tcp_time_stamp + tcptw->tw_ts_offset,
+                       tcptw->tw_ts_recent,
+@@ -801,8 +802,10 @@ static void tcp_v4_reqsk_send_ack(const
+       /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
+        * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
+        */
+-      tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
+-                      tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
++      u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
++                                           tcp_sk(sk)->snd_nxt;
++
++      tcp_v4_send_ack(sock_net(sk), skb, seq,
+                       tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
+                       tcp_time_stamp,
+                       req->ts_recent,
diff --git a/queue-4.4/tcp-md5-release-request-socket-instead-of-listener.patch b/queue-4.4/tcp-md5-release-request-socket-instead-of-listener.patch
new file mode 100644 (file)
index 0000000..b916bc6
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 11 Feb 2016 22:50:29 -0800
+Subject: tcp: md5: release request socket instead of listener
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 729235554d805c63e5e274fcc6a98e71015dd847 ]
+
+If tcp_v4_inbound_md5_hash() returns an error, we must release
+the refcount on the request socket, not on the listener.
+
+The bug was added for IPv4 only.
+
+Fixes: 079096f103fac ("tcp/dccp: install syn_recv requests into ehash table")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_ipv4.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1597,8 +1597,10 @@ process:
+               struct sock *nsk = NULL;
+               sk = req->rsk_listener;
+-              if (tcp_v4_inbound_md5_hash(sk, skb))
+-                      goto discard_and_relse;
++              if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
++                      reqsk_put(req);
++                      goto discard_it;
++              }
+               if (likely(sk->sk_state == TCP_LISTEN)) {
+                       nsk = tcp_check_req(sk, skb, req, false);
+               } else {
diff --git a/queue-4.4/tg3-fix-for-tg3-transmit-queue-0-timed-out-when-too-many-gso_segs.patch b/queue-4.4/tg3-fix-for-tg3-transmit-queue-0-timed-out-when-too-many-gso_segs.patch
new file mode 100644 (file)
index 0000000..ba5f07f
--- /dev/null
@@ -0,0 +1,86 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Siva Reddy Kallam <siva.kallam@broadcom.com>
+Date: Wed, 3 Feb 2016 14:09:38 +0530
+Subject: tg3: Fix for tg3 transmit queue 0 timed out when too many gso_segs
+
+From: Siva Reddy Kallam <siva.kallam@broadcom.com>
+
+[ Upstream commit b7d987295c74500b733a0ba07f9a9bcc4074fa83 ]
+
+tg3_tso_bug() can hit a condition where the entire tx ring is not big
+enough to segment the GSO packet. For example, if MSS is very small,
+gso_segs can exceed the tx ring size. When we hit the condition, it
+will cause tx timeout.
+
+tg3_tso_bug() is called to handle TSO and DMA hardware bugs.
+For TSO bugs, if tg3_tso_bug() cannot succeed, we have to drop the packet.
+For DMA bugs, we can still fall back to linearize the SKB and let the
+hardware transmit the TSO packet.
+
+This patch adds a function tg3_tso_bug_gso_check() to check if there
+are enough tx descriptors for GSO before calling tg3_tso_bug().
+The caller will then handle the error appropriately - drop or
+lineraize the SKB.
+
+v2: Corrected patch description to avoid confusion.
+
+Signed-off-by: Siva Reddy Kallam <siva.kallam@broadcom.com>
+Signed-off-by: Michael Chan <mchan@broadcom.com>
+Acked-by: Prashant Sreedharan <prashant@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c |   25 +++++++++++++++++++------
+ 1 file changed, 19 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -7833,6 +7833,14 @@ static int tigon3_dma_hwbug_workaround(s
+       return ret;
+ }
++static bool tg3_tso_bug_gso_check(struct tg3_napi *tnapi, struct sk_buff *skb)
++{
++      /* Check if we will never have enough descriptors,
++       * as gso_segs can be more than current ring size
++       */
++      return skb_shinfo(skb)->gso_segs < tnapi->tx_pending / 3;
++}
++
+ static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
+ /* Use GSO to workaround all TSO packets that meet HW bug conditions
+@@ -7936,14 +7944,19 @@ static netdev_tx_t tg3_start_xmit(struct
+                * vlan encapsulated.
+                */
+               if (skb->protocol == htons(ETH_P_8021Q) ||
+-                  skb->protocol == htons(ETH_P_8021AD))
+-                      return tg3_tso_bug(tp, tnapi, txq, skb);
++                  skb->protocol == htons(ETH_P_8021AD)) {
++                      if (tg3_tso_bug_gso_check(tnapi, skb))
++                              return tg3_tso_bug(tp, tnapi, txq, skb);
++                      goto drop;
++              }
+               if (!skb_is_gso_v6(skb)) {
+                       if (unlikely((ETH_HLEN + hdr_len) > 80) &&
+-                          tg3_flag(tp, TSO_BUG))
+-                              return tg3_tso_bug(tp, tnapi, txq, skb);
+-
++                          tg3_flag(tp, TSO_BUG)) {
++                              if (tg3_tso_bug_gso_check(tnapi, skb))
++                                      return tg3_tso_bug(tp, tnapi, txq, skb);
++                              goto drop;
++                      }
+                       ip_csum = iph->check;
+                       ip_tot_len = iph->tot_len;
+                       iph->check = 0;
+@@ -8075,7 +8088,7 @@ static netdev_tx_t tg3_start_xmit(struct
+       if (would_hit_hwbug) {
+               tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i);
+-              if (mss) {
++              if (mss && tg3_tso_bug_gso_check(tnapi, skb)) {
+                       /* If it's a TSO packet, do GSO instead of
+                        * allocating and copying to a large linear SKB
+                        */
diff --git a/queue-4.4/tipc-fix-connection-abort-during-subscription-cancel.patch b/queue-4.4/tipc-fix-connection-abort-during-subscription-cancel.patch
new file mode 100644 (file)
index 0000000..abffa35
--- /dev/null
@@ -0,0 +1,57 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
+Date: Wed, 27 Jan 2016 11:35:59 +0100
+Subject: tipc: fix connection abort during subscription cancel
+
+From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
+
+[ Upstream commit 4d5cfcba2f6ec494d8810b9e3c0a7b06255c8067 ]
+
+In 'commit 7fe8097cef5f ("tipc: fix nullpointer bug when subscribing
+to events")', we terminate the connection if the subscription
+creation fails.
+In the same commit, the subscription creation result was based on
+the value of the subscription pointer (set in the function) instead
+of the return code.
+
+Unfortunately, the same function tipc_subscrp_create() handles
+subscription cancel request. For a subscription cancellation request,
+the subscription pointer cannot be set. Thus if a subscriber has
+several subscriptions and cancels any of them, the connection is
+terminated.
+
+In this commit, we terminate the connection based on the return value
+of tipc_subscrp_create().
+Fixes: commit 7fe8097cef5f ("tipc: fix nullpointer bug when subscribing to events")
+
+Reviewed-by:  Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/subscr.c |   11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/net/tipc/subscr.c
++++ b/net/tipc/subscr.c
+@@ -289,15 +289,14 @@ static void tipc_subscrb_rcv_cb(struct n
+                               struct sockaddr_tipc *addr, void *usr_data,
+                               void *buf, size_t len)
+ {
+-      struct tipc_subscriber *subscriber = usr_data;
++      struct tipc_subscriber *subscrb = usr_data;
+       struct tipc_subscription *sub = NULL;
+       struct tipc_net *tn = net_generic(net, tipc_net_id);
+-      tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub);
+-      if (sub)
+-              tipc_nametbl_subscribe(sub);
+-      else
+-              tipc_conn_terminate(tn->topsrv, subscriber->conid);
++      if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub))
++              return tipc_conn_terminate(tn->topsrv, subscrb->conid);
++
++      tipc_nametbl_subscribe(sub);
+ }
+ /* Handle one request to establish a new subscriber */
diff --git a/queue-4.4/tipc-fix-premature-addition-of-node-to-lookup-table.patch b/queue-4.4/tipc-fix-premature-addition-of-node-to-lookup-table.patch
new file mode 100644 (file)
index 0000000..63855d7
--- /dev/null
@@ -0,0 +1,59 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Jon Paul Maloy <jon.maloy@ericsson.com>
+Date: Wed, 10 Feb 2016 16:14:57 -0500
+Subject: tipc: fix premature addition of node to lookup table
+
+From: Jon Paul Maloy <jon.maloy@ericsson.com>
+
+[ Upstream commit d5c91fb72f1652ea3026925240a0998a42ddb16b ]
+
+In commit 5266698661401a ("tipc: let broadcast packet reception
+use new link receive function") we introduced a new per-node
+broadcast reception link instance. This link is created at the
+moment the node itself is created. Unfortunately, the allocation
+is done after the node instance has already been added to the node
+lookup hash table. This creates a potential race condition, where
+arriving broadcast packets are able to find and access the node
+before it has been fully initialized, and before the above mentioned
+link has been created. The result is occasional crashes in the function
+tipc_bcast_rcv(), which is trying to access the not-yet existing link.
+
+We fix this by deferring the addition of the node instance until after
+it has been fully initialized in the function tipc_node_create().
+
+Acked-by: Ying Xue <ying.xue@windriver.com>
+Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/node.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/net/tipc/node.c
++++ b/net/tipc/node.c
+@@ -168,12 +168,6 @@ struct tipc_node *tipc_node_create(struc
+       skb_queue_head_init(&n_ptr->bc_entry.inputq1);
+       __skb_queue_head_init(&n_ptr->bc_entry.arrvq);
+       skb_queue_head_init(&n_ptr->bc_entry.inputq2);
+-      hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
+-      list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+-              if (n_ptr->addr < temp_node->addr)
+-                      break;
+-      }
+-      list_add_tail_rcu(&n_ptr->list, &temp_node->list);
+       n_ptr->state = SELF_DOWN_PEER_LEAVING;
+       n_ptr->signature = INVALID_NODE_SIG;
+       n_ptr->active_links[0] = INVALID_BEARER_ID;
+@@ -193,6 +187,12 @@ struct tipc_node *tipc_node_create(struc
+       tipc_node_get(n_ptr);
+       setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr);
+       n_ptr->keepalive_intv = U32_MAX;
++      hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
++      list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
++              if (n_ptr->addr < temp_node->addr)
++                      break;
++      }
++      list_add_tail_rcu(&n_ptr->list, &temp_node->list);
+ exit:
+       spin_unlock_bh(&tn->node_list_lock);
+       return n_ptr;
diff --git a/queue-4.4/tipc-unlock-in-error-path.patch b/queue-4.4/tipc-unlock-in-error-path.patch
new file mode 100644 (file)
index 0000000..8d6956f
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Insu Yun <wuninsu@gmail.com>
+Date: Wed, 17 Feb 2016 11:47:35 -0500
+Subject: tipc: unlock in error path
+
+From: Insu Yun <wuninsu@gmail.com>
+
+[ Upstream commit b53ce3e7d407aa4196877a48b8601181162ab158 ]
+
+tipc_bcast_unlock need to be unlocked in error path.
+
+Signed-off-by: Insu Yun <wuninsu@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/bcast.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/tipc/bcast.c
++++ b/net/tipc/bcast.c
+@@ -399,8 +399,10 @@ int tipc_nl_add_bc_link(struct net *net,
+       hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+                         NLM_F_MULTI, TIPC_NL_LINK_GET);
+-      if (!hdr)
++      if (!hdr) {
++              tipc_bcast_unlock(net);
+               return -EMSGSIZE;
++      }
+       attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
+       if (!attrs)
diff --git a/queue-4.4/tunnels-allow-ipv6-udp-checksums-to-be-correctly-controlled.patch b/queue-4.4/tunnels-allow-ipv6-udp-checksums-to-be-correctly-controlled.patch
new file mode 100644 (file)
index 0000000..7e78df5
--- /dev/null
@@ -0,0 +1,71 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Jesse Gross <jesse@kernel.org>
+Date: Wed, 20 Jan 2016 16:22:47 -0800
+Subject: tunnels: Allow IPv6 UDP checksums to be correctly controlled.
+
+From: Jesse Gross <jesse@kernel.org>
+
+[ Upstream commit 35e2d1152b22eae99c961affbe85374bef05a775 ]
+
+When configuring checksums on UDP tunnels, the flags are different
+for IPv4 vs. IPv6 (and reversed). However, when lightweight tunnels
+are enabled the flags used are always the IPv4 versions, which are
+ignored in the IPv6 code paths. This uses the correct IPv6 flags, so
+checksums can be controlled appropriately.
+
+Fixes: a725e514 ("vxlan: metadata based tunneling for IPv6")
+Fixes: abe492b4 ("geneve: UDP checksum configuration via netlink")
+Signed-off-by: Jesse Gross <jesse@kernel.org>
+Acked-by: Jiri Benc <jbenc@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c |   23 ++++++++++++++++-------
+ 1 file changed, 16 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -1984,11 +1984,6 @@ static void vxlan_xmit_one(struct sk_buf
+                                    vxlan->cfg.port_max, true);
+       if (info) {
+-              if (info->key.tun_flags & TUNNEL_CSUM)
+-                      flags |= VXLAN_F_UDP_CSUM;
+-              else
+-                      flags &= ~VXLAN_F_UDP_CSUM;
+-
+               ttl = info->key.ttl;
+               tos = info->key.tos;
+@@ -2003,8 +1998,15 @@ static void vxlan_xmit_one(struct sk_buf
+                       goto drop;
+               sk = vxlan->vn4_sock->sock->sk;
+-              if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT))
+-                      df = htons(IP_DF);
++              if (info) {
++                      if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
++                              df = htons(IP_DF);
++
++                      if (info->key.tun_flags & TUNNEL_CSUM)
++                              flags |= VXLAN_F_UDP_CSUM;
++                      else
++                              flags &= ~VXLAN_F_UDP_CSUM;
++              }
+               memset(&fl4, 0, sizeof(fl4));
+               fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0;
+@@ -2102,6 +2104,13 @@ static void vxlan_xmit_one(struct sk_buf
+                       return;
+               }
++              if (info) {
++                      if (info->key.tun_flags & TUNNEL_CSUM)
++                              flags &= ~VXLAN_F_UDP_ZERO_CSUM6_TX;
++                      else
++                              flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
++              }
++
+               ttl = ttl ? : ip6_dst_hoplimit(ndst);
+               err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr,
+                                     0, ttl, src_port, dst_port, htonl(vni << 8), md,
diff --git a/queue-4.4/unix-correctly-track-in-flight-fds-in-sending-process-user_struct.patch b/queue-4.4/unix-correctly-track-in-flight-fds-in-sending-process-user_struct.patch
new file mode 100644 (file)
index 0000000..904f803
--- /dev/null
@@ -0,0 +1,150 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Date: Wed, 3 Feb 2016 02:11:03 +0100
+Subject: unix: correctly track in-flight fds in sending process user_struct
+
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+
+[ Upstream commit 415e3d3e90ce9e18727e8843ae343eda5a58fad6 ]
+
+The commit referenced in the Fixes tag incorrectly accounted the number
+of in-flight fds over a unix domain socket to the original opener
+of the file-descriptor. This allows another process to arbitrary
+deplete the original file-openers resource limit for the maximum of
+open files. Instead the sending processes and its struct cred should
+be credited.
+
+To do so, we add a reference counted struct user_struct pointer to the
+scm_fp_list and use it to account for the number of inflight unix fds.
+
+Fixes: 712f4aad406bb1 ("unix: properly account for FDs passed over unix sockets")
+Reported-by: David Herrmann <dh.herrmann@gmail.com>
+Cc: David Herrmann <dh.herrmann@gmail.com>
+Cc: Willy Tarreau <w@1wt.eu>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/af_unix.h |    4 ++--
+ include/net/scm.h     |    1 +
+ net/core/scm.c        |    7 +++++++
+ net/unix/af_unix.c    |    4 ++--
+ net/unix/garbage.c    |    8 ++++----
+ 5 files changed, 16 insertions(+), 8 deletions(-)
+
+--- a/include/net/af_unix.h
++++ b/include/net/af_unix.h
+@@ -6,8 +6,8 @@
+ #include <linux/mutex.h>
+ #include <net/sock.h>
+-void unix_inflight(struct file *fp);
+-void unix_notinflight(struct file *fp);
++void unix_inflight(struct user_struct *user, struct file *fp);
++void unix_notinflight(struct user_struct *user, struct file *fp);
+ void unix_gc(void);
+ void wait_for_unix_gc(void);
+ struct sock *unix_get_socket(struct file *filp);
+--- a/include/net/scm.h
++++ b/include/net/scm.h
+@@ -21,6 +21,7 @@ struct scm_creds {
+ struct scm_fp_list {
+       short                   count;
+       short                   max;
++      struct user_struct      *user;
+       struct file             *fp[SCM_MAX_FD];
+ };
+--- a/net/core/scm.c
++++ b/net/core/scm.c
+@@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *c
+               *fplp = fpl;
+               fpl->count = 0;
+               fpl->max = SCM_MAX_FD;
++              fpl->user = NULL;
+       }
+       fpp = &fpl->fp[fpl->count];
+@@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *c
+               *fpp++ = file;
+               fpl->count++;
+       }
++
++      if (!fpl->user)
++              fpl->user = get_uid(current_user());
++
+       return num;
+ }
+@@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *sc
+               scm->fp = NULL;
+               for (i=fpl->count-1; i>=0; i--)
+                       fput(fpl->fp[i]);
++              free_uid(fpl->user);
+               kfree(fpl);
+       }
+ }
+@@ -336,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct sc
+               for (i = 0; i < fpl->count; i++)
+                       get_file(fpl->fp[i]);
+               new_fpl->max = new_fpl->count;
++              new_fpl->user = get_uid(fpl->user);
+       }
+       return new_fpl;
+ }
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -1496,7 +1496,7 @@ static void unix_detach_fds(struct scm_c
+       UNIXCB(skb).fp = NULL;
+       for (i = scm->fp->count-1; i >= 0; i--)
+-              unix_notinflight(scm->fp->fp[i]);
++              unix_notinflight(scm->fp->user, scm->fp->fp[i]);
+ }
+ static void unix_destruct_scm(struct sk_buff *skb)
+@@ -1561,7 +1561,7 @@ static int unix_attach_fds(struct scm_co
+               return -ENOMEM;
+       for (i = scm->fp->count - 1; i >= 0; i--)
+-              unix_inflight(scm->fp->fp[i]);
++              unix_inflight(scm->fp->user, scm->fp->fp[i]);
+       return max_level;
+ }
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -116,7 +116,7 @@ struct sock *unix_get_socket(struct file
+  * descriptor if it is for an AF_UNIX socket.
+  */
+-void unix_inflight(struct file *fp)
++void unix_inflight(struct user_struct *user, struct file *fp)
+ {
+       struct sock *s = unix_get_socket(fp);
+@@ -133,11 +133,11 @@ void unix_inflight(struct file *fp)
+               }
+               unix_tot_inflight++;
+       }
+-      fp->f_cred->user->unix_inflight++;
++      user->unix_inflight++;
+       spin_unlock(&unix_gc_lock);
+ }
+-void unix_notinflight(struct file *fp)
++void unix_notinflight(struct user_struct *user, struct file *fp)
+ {
+       struct sock *s = unix_get_socket(fp);
+@@ -152,7 +152,7 @@ void unix_notinflight(struct file *fp)
+                       list_del_init(&u->link);
+               unix_tot_inflight--;
+       }
+-      fp->f_cred->user->unix_inflight--;
++      user->unix_inflight--;
+       spin_unlock(&unix_gc_lock);
+ }
diff --git a/queue-4.4/unix_diag-fix-incorrect-sign-extension-in-unix_lookup_by_ino.patch b/queue-4.4/unix_diag-fix-incorrect-sign-extension-in-unix_lookup_by_ino.patch
new file mode 100644 (file)
index 0000000..f476e23
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Mon Feb 29 14:33:50 PST 2016
+From: "Dmitry V. Levin" <ldv@altlinux.org>
+Date: Fri, 19 Feb 2016 04:27:48 +0300
+Subject: unix_diag: fix incorrect sign extension in unix_lookup_by_ino
+
+From: "Dmitry V. Levin" <ldv@altlinux.org>
+
+[ Upstream commit b5f0549231ffb025337be5a625b0ff9f52b016f0 ]
+
+The value passed by unix_diag_get_exact to unix_lookup_by_ino has type
+__u32, but unix_lookup_by_ino's argument ino has type int, which is not
+a problem yet.
+However, when ino is compared with sock_i_ino return value of type
+unsigned long, ino is sign extended to signed long, and this results
+to incorrect comparison on 64-bit architectures for inode numbers
+greater than INT_MAX.
+
+This bug was found by strace test suite.
+
+Fixes: 5d3cae8bc39d ("unix_diag: Dumping exact socket core")
+Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/diag.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/unix/diag.c
++++ b/net/unix/diag.c
+@@ -220,7 +220,7 @@ done:
+       return skb->len;
+ }
+-static struct sock *unix_lookup_by_ino(int ino)
++static struct sock *unix_lookup_by_ino(unsigned int ino)
+ {
+       int i;
+       struct sock *sk;