4.8-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 18 Nov 2016 10:36:31 +0000 (11:36 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 18 Nov 2016 10:36:31 +0000 (11:36 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 18 Nov 2016 10:36:31 +0000 (11:36 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 18 Nov 2016 10:36:31 +0000 (11:36 +0100)
diff --git a/queue-4.8/bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch b/queue-4.8/bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch

new file mode 100644 (file)

index 0000000..0d9b406
--- /dev/null
+++ b/queue-4.8/bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch
@@ -0,0 +1,59 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Andy Gospodarek <gospo@broadcom.com>
+Date: Mon, 31 Oct 2016 13:32:03 -0400
+Subject: bgmac: stop clearing DMA receive control register right after it is set
+
+From: Andy Gospodarek <gospo@broadcom.com>
+
+
+[ Upstream commit fcdefccac976ee51dd6071832b842d8fb41c479c ]
+
+Current bgmac code initializes some DMA settings in the receive control
+register for some hardware and then immediately clears those settings.
+Not clearing those settings results in ~420Mbps *improvement* in
+throughput; this system can now receive frames at line-rate on Broadcom
+5871x hardware compared to ~520Mbps today.  I also tested a few other
+values but found there to be no discernible difference in CPU
+utilization even if burst size and prefetching values are different.
+
+On the hardware tested there was no need to keep the code that cleared
+all but bits 16-17, but since there is a wide variety of hardware that
+used this driver (I did not look at all hardware docs for hardware using
+this IP block), I find it wise to move this call up and clear bits just
+after reading the default value from the hardware rather than completely
+removing it.
+
+This is a good candidate for -stable >=3.14 since that is when the code
+that was supposed to improve performance (but did not) was introduced.
+
+Signed-off-by: Andy Gospodarek <gospo@broadcom.com>
+Fixes: 56ceecde1f29 ("bgmac: initialize the DMA controller of core...")
+Cc: Hauke Mehrtens <hauke@hauke-m.de>
+Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bgmac.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -307,6 +307,10 @@ static void bgmac_dma_rx_enable(struct b
+       u32 ctl;
+ 
+       ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
++
++      /* preserve ONLY bits 16-17 from current hardware value */
++      ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
++
+       if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) {
+               ctl &= ~BGMAC_DMA_RX_BL_MASK;
+               ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT;
+@@ -317,7 +321,6 @@ static void bgmac_dma_rx_enable(struct b
+               ctl &= ~BGMAC_DMA_RX_PT_MASK;
+               ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT;
+       }
+-      ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
+       ctl |= BGMAC_DMA_RX_ENABLE;
+       ctl |= BGMAC_DMA_RX_PARITY_DISABLE;
+       ctl |= BGMAC_DMA_RX_OVERFLOW_CONT;
diff --git a/queue-4.8/bnx2-wait-for-in-flight-dma-to-complete-at-probe-stage.patch b/queue-4.8/bnx2-wait-for-in-flight-dma-to-complete-at-probe-stage.patch

new file mode 100644 (file)

index 0000000..86e17ce
--- /dev/null
+++ b/queue-4.8/bnx2-wait-for-in-flight-dma-to-complete-at-probe-stage.patch
@@ -0,0 +1,95 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Baoquan He <bhe@redhat.com>
+Date: Sun, 13 Nov 2016 13:01:33 +0800
+Subject: bnx2: Wait for in-flight DMA to complete at probe stage
+
+From: Baoquan He <bhe@redhat.com>
+
+
+[ Upstream commit 6df77862f63f389df3b1ad879738e04440d7385d ]
+
+In-flight DMA from 1st kernel could continue going in kdump kernel.
+New io-page table has been created before bnx2 does reset at open stage.
+We have to wait for the in-flight DMA to complete to avoid it look up
+into the newly created io-page table at probe stage.
+
+Suggested-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Acked-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnx2.c |   38 +++++++++++++++++++++++++++++------
+ 1 file changed, 32 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnx2.c
++++ b/drivers/net/ethernet/broadcom/bnx2.c
+@@ -49,6 +49,7 @@
+ #include <linux/firmware.h>
+ #include <linux/log2.h>
+ #include <linux/aer.h>
++#include <linux/crash_dump.h>
+ 
+ #if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE)
+ #define BCM_CNIC 1
+@@ -4759,15 +4760,16 @@ bnx2_setup_msix_tbl(struct bnx2 *bp)
+       BNX2_WR(bp, BNX2_PCI_GRC_WINDOW3_ADDR, BNX2_MSIX_PBA_ADDR);
+ }
+ 
+-static int
+-bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
++static void
++bnx2_wait_dma_complete(struct bnx2 *bp)
+ {
+       u32 val;
+-      int i, rc = 0;
+-      u8 old_port;
++      int i;
+ 
+-      /* Wait for the current PCI transaction to complete before
+-       * issuing a reset. */
++      /*
++       * Wait for the current PCI transaction to complete before
++       * issuing a reset.
++       */
+       if ((BNX2_CHIP(bp) == BNX2_CHIP_5706) ||
+           (BNX2_CHIP(bp) == BNX2_CHIP_5708)) {
+               BNX2_WR(bp, BNX2_MISC_ENABLE_CLR_BITS,
+@@ -4791,6 +4793,21 @@ bnx2_reset_chip(struct bnx2 *bp, u32 res
+               }
+       }
+ 
++      return;
++}
++
++
++static int
++bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
++{
++      u32 val;
++      int i, rc = 0;
++      u8 old_port;
++
++      /* Wait for the current PCI transaction to complete before
++       * issuing a reset. */
++      bnx2_wait_dma_complete(bp);
++
+       /* Wait for the firmware to tell us it is ok to issue a reset. */
+       bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code, 1, 1);
+ 
+@@ -8575,6 +8592,15 @@ bnx2_init_one(struct pci_dev *pdev, cons
+ 
+       pci_set_drvdata(pdev, dev);
+ 
++      /*
++       * In-flight DMA from 1st kernel could continue going in kdump kernel.
++       * New io-page table has been created before bnx2 does reset at open stage.
++       * We have to wait for the in-flight DMA to complete to avoid it look up
++       * into the newly created io-page table.
++       */
++      if (is_kdump_kernel())
++              bnx2_wait_dma_complete(bp);
++
+       memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN);
+ 
+       dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
diff --git a/queue-4.8/bpf-fix-htab-map-destruction-when-extra-reserve-is-in-use.patch b/queue-4.8/bpf-fix-htab-map-destruction-when-extra-reserve-is-in-use.patch

new file mode 100644 (file)

index 0000000..1878b61
--- /dev/null
+++ b/queue-4.8/bpf-fix-htab-map-destruction-when-extra-reserve-is-in-use.patch
@@ -0,0 +1,42 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 4 Nov 2016 00:01:19 +0100
+Subject: bpf: fix htab map destruction when extra reserve is in use
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit 483bed2b0ddd12ec33fc9407e0c6e1088e77a97c ]
+
+Commit a6ed3ea65d98 ("bpf: restore behavior of bpf_map_update_elem")
+added an extra per-cpu reserve to the hash table map to restore old
+behaviour from pre prealloc times. When non-prealloc is in use for a
+map, then problem is that once a hash table extra element has been
+linked into the hash-table, and the hash table is destroyed due to
+refcount dropping to zero, then htab_map_free() -> delete_all_elements()
+will walk the whole hash table and drop all elements via htab_elem_free().
+The problem is that the element from the extra reserve is first fed
+to the wrong backend allocator and eventually freed twice.
+
+Fixes: a6ed3ea65d98 ("bpf: restore behavior of bpf_map_update_elem")
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/hashtab.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/bpf/hashtab.c
++++ b/kernel/bpf/hashtab.c
+@@ -687,7 +687,8 @@ static void delete_all_elements(struct b
+ 
+               hlist_for_each_entry_safe(l, n, head, hash_node) {
+                       hlist_del_rcu(&l->hash_node);
+-                      htab_elem_free(htab, l);
++                      if (l->state != HTAB_EXTRA_ELEM_USED)
++                              htab_elem_free(htab, l);
+               }
+       }
+ }
diff --git a/queue-4.8/dccp-do-not-release-listeners-too-soon.patch b/queue-4.8/dccp-do-not-release-listeners-too-soon.patch

new file mode 100644 (file)

index 0000000..eb0963b
--- /dev/null
+++ b/queue-4.8/dccp-do-not-release-listeners-too-soon.patch
@@ -0,0 +1,142 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 2 Nov 2016 17:14:41 -0700
+Subject: dccp: do not release listeners too soon
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit c3f24cfb3e508c70c26ee8569d537c8ca67a36c6 ]
+
+Andrey Konovalov reported following error while fuzzing with syzkaller :
+
+IPv4: Attempt to release alive inet socket ffff880068e98940
+kasan: CONFIG_KASAN_INLINE enabled
+kasan: GPF could be caused by NULL-ptr deref or user memory access
+general protection fault: 0000 [#1] SMP KASAN
+Modules linked in:
+CPU: 1 PID: 3905 Comm: a.out Not tainted 4.9.0-rc3+ #333
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+task: ffff88006b9e0000 task.stack: ffff880068770000
+RIP: 0010:[<ffffffff819ead5f>]  [<ffffffff819ead5f>]
+selinux_socket_sock_rcv_skb+0xff/0x6a0 security/selinux/hooks.c:4639
+RSP: 0018:ffff8800687771c8  EFLAGS: 00010202
+RAX: ffff88006b9e0000 RBX: 1ffff1000d0eee3f RCX: 1ffff1000d1d312a
+RDX: 1ffff1000d1d31a6 RSI: dffffc0000000000 RDI: 0000000000000010
+RBP: ffff880068777360 R08: 0000000000000000 R09: 0000000000000002
+R10: dffffc0000000000 R11: 0000000000000006 R12: ffff880068e98940
+R13: 0000000000000002 R14: ffff880068777338 R15: 0000000000000000
+FS:  00007f00ff760700(0000) GS:ffff88006cd00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000020008000 CR3: 000000006a308000 CR4: 00000000000006e0
+Stack:
+ ffff8800687771e0 ffffffff812508a5 ffff8800686f3168 0000000000000007
+ ffff88006ac8cdfc ffff8800665ea500 0000000041b58ab3 ffffffff847b5480
+ ffffffff819eac60 ffff88006b9e0860 ffff88006b9e0868 ffff88006b9e07f0
+Call Trace:
+ [<ffffffff819c8dd5>] security_sock_rcv_skb+0x75/0xb0 security/security.c:1317
+ [<ffffffff82c2a9e7>] sk_filter_trim_cap+0x67/0x10e0 net/core/filter.c:81
+ [<ffffffff82b81e60>] __sk_receive_skb+0x30/0xa00 net/core/sock.c:460
+ [<ffffffff838bbf12>] dccp_v4_rcv+0xdb2/0x1910 net/dccp/ipv4.c:873
+ [<ffffffff83069d22>] ip_local_deliver_finish+0x332/0xad0
+net/ipv4/ip_input.c:216
+ [<     inline     >] NF_HOOK_THRESH ./include/linux/netfilter.h:232
+ [<     inline     >] NF_HOOK ./include/linux/netfilter.h:255
+ [<ffffffff8306abd2>] ip_local_deliver+0x1c2/0x4b0 net/ipv4/ip_input.c:257
+ [<     inline     >] dst_input ./include/net/dst.h:507
+ [<ffffffff83068500>] ip_rcv_finish+0x750/0x1c40 net/ipv4/ip_input.c:396
+ [<     inline     >] NF_HOOK_THRESH ./include/linux/netfilter.h:232
+ [<     inline     >] NF_HOOK ./include/linux/netfilter.h:255
+ [<ffffffff8306b82f>] ip_rcv+0x96f/0x12f0 net/ipv4/ip_input.c:487
+ [<ffffffff82bd9fb7>] __netif_receive_skb_core+0x1897/0x2a50 net/core/dev.c:4213
+ [<ffffffff82bdb19a>] __netif_receive_skb+0x2a/0x170 net/core/dev.c:4251
+ [<ffffffff82bdb493>] netif_receive_skb_internal+0x1b3/0x390 net/core/dev.c:4279
+ [<ffffffff82bdb6b8>] netif_receive_skb+0x48/0x250 net/core/dev.c:4303
+ [<ffffffff8241fc75>] tun_get_user+0xbd5/0x28a0 drivers/net/tun.c:1308
+ [<ffffffff82421b5a>] tun_chr_write_iter+0xda/0x190 drivers/net/tun.c:1332
+ [<     inline     >] new_sync_write fs/read_write.c:499
+ [<ffffffff8151bd44>] __vfs_write+0x334/0x570 fs/read_write.c:512
+ [<ffffffff8151f85b>] vfs_write+0x17b/0x500 fs/read_write.c:560
+ [<     inline     >] SYSC_write fs/read_write.c:607
+ [<ffffffff81523184>] SyS_write+0xd4/0x1a0 fs/read_write.c:599
+ [<ffffffff83fc02c1>] entry_SYSCALL_64_fastpath+0x1f/0xc2
+
+It turns out DCCP calls __sk_receive_skb(), and this broke when
+lookups no longer took a reference on listeners.
+
+Fix this issue by adding a @refcounted parameter to __sk_receive_skb(),
+so that sock_put() is used only when needed.
+
+Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Tested-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sock.h |    4 ++--
+ net/core/sock.c    |    5 +++--
+ net/dccp/ipv4.c    |    2 +-
+ net/dccp/ipv6.c    |    3 ++-
+ 4 files changed, 8 insertions(+), 6 deletions(-)
+
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1587,11 +1587,11 @@ static inline void sock_put(struct sock
+ void sock_gen_put(struct sock *sk);
+ 
+ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested,
+-                   unsigned int trim_cap);
++                   unsigned int trim_cap, bool refcounted);
+ static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+                                const int nested)
+ {
+-      return __sk_receive_skb(sk, skb, nested, 1);
++      return __sk_receive_skb(sk, skb, nested, 1, true);
+ }
+ 
+ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk,
+ EXPORT_SYMBOL(sock_queue_rcv_skb);
+ 
+ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+-                   const int nested, unsigned int trim_cap)
++                   const int nested, unsigned int trim_cap, bool refcounted)
+ {
+       int rc = NET_RX_SUCCESS;
+ 
+@@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, st
+ 
+       bh_unlock_sock(sk);
+ out:
+-      sock_put(sk);
++      if (refcounted)
++              sock_put(sk);
+       return rc;
+ discard_and_relse:
+       kfree_skb(skb);
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -868,7 +868,7 @@ lookup:
+               goto discard_and_relse;
+       nf_reset(skb);
+ 
+-      return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4);
++      return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted);
+ 
+ no_dccp_socket:
+       if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -738,7 +738,8 @@ lookup:
+       if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+               goto discard_and_relse;
+ 
+-      return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0;
++      return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
++                              refcounted) ? -1 : 0;
+ 
+ no_dccp_socket:
+       if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
diff --git a/queue-4.8/dccp-do-not-send-reset-to-already-closed-sockets.patch b/queue-4.8/dccp-do-not-send-reset-to-already-closed-sockets.patch

new file mode 100644 (file)

index 0000000..bffecf0
--- /dev/null
+++ b/queue-4.8/dccp-do-not-send-reset-to-already-closed-sockets.patch
@@ -0,0 +1,74 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 2 Nov 2016 18:04:24 -0700
+Subject: dccp: do not send reset to already closed sockets
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 346da62cc186c4b4b1ac59f87f4482b47a047388 ]
+
+Andrey reported following warning while fuzzing with syzkaller
+
+WARNING: CPU: 1 PID: 21072 at net/dccp/proto.c:83 dccp_set_state+0x229/0x290
+Kernel panic - not syncing: panic_on_warn set ...
+
+CPU: 1 PID: 21072 Comm: syz-executor Not tainted 4.9.0-rc1+ #293
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+ ffff88003d4c7738 ffffffff81b474f4 0000000000000003 dffffc0000000000
+ ffffffff844f8b00 ffff88003d4c7804 ffff88003d4c7800 ffffffff8140c06a
+ 0000000041b58ab3 ffffffff8479ab7d ffffffff8140beae ffffffff8140cd00
+Call Trace:
+ [<     inline     >] __dump_stack lib/dump_stack.c:15
+ [<ffffffff81b474f4>] dump_stack+0xb3/0x10f lib/dump_stack.c:51
+ [<ffffffff8140c06a>] panic+0x1bc/0x39d kernel/panic.c:179
+ [<ffffffff8111125c>] __warn+0x1cc/0x1f0 kernel/panic.c:542
+ [<ffffffff8111144c>] warn_slowpath_null+0x2c/0x40 kernel/panic.c:585
+ [<ffffffff8389e5d9>] dccp_set_state+0x229/0x290 net/dccp/proto.c:83
+ [<ffffffff838a0aa2>] dccp_close+0x612/0xc10 net/dccp/proto.c:1016
+ [<ffffffff8316bf1f>] inet_release+0xef/0x1c0 net/ipv4/af_inet.c:415
+ [<ffffffff82b6e89e>] sock_release+0x8e/0x1d0 net/socket.c:570
+ [<ffffffff82b6e9f6>] sock_close+0x16/0x20 net/socket.c:1017
+ [<ffffffff815256ad>] __fput+0x29d/0x720 fs/file_table.c:208
+ [<ffffffff81525bb5>] ____fput+0x15/0x20 fs/file_table.c:244
+ [<ffffffff811727d8>] task_work_run+0xf8/0x170 kernel/task_work.c:116
+ [<     inline     >] exit_task_work include/linux/task_work.h:21
+ [<ffffffff8111bc53>] do_exit+0x883/0x2ac0 kernel/exit.c:828
+ [<ffffffff811221fe>] do_group_exit+0x10e/0x340 kernel/exit.c:931
+ [<ffffffff81143c94>] get_signal+0x634/0x15a0 kernel/signal.c:2307
+ [<ffffffff81054aad>] do_signal+0x8d/0x1a30 arch/x86/kernel/signal.c:807
+ [<ffffffff81003a05>] exit_to_usermode_loop+0xe5/0x130
+arch/x86/entry/common.c:156
+ [<     inline     >] prepare_exit_to_usermode arch/x86/entry/common.c:190
+ [<ffffffff81006298>] syscall_return_slowpath+0x1a8/0x1e0
+arch/x86/entry/common.c:259
+ [<ffffffff83fc1a62>] entry_SYSCALL_64_fastpath+0xc0/0xc2
+Dumping ftrace buffer:
+   (ftrace buffer empty)
+Kernel Offset: disabled
+
+Fix this the same way we did for TCP in commit 565b7b2d2e63
+("tcp: do not send reset to already closed sockets")
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Tested-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/proto.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/dccp/proto.c
++++ b/net/dccp/proto.c
+@@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long ti
+               __kfree_skb(skb);
+       }
+ 
++      /* If socket has been already reset kill it. */
++      if (sk->sk_state == DCCP_CLOSED)
++              goto adjudge_to_death;
++
+       if (data_was_unread) {
+               /* Unread data was tossed, send an appropriate Reset Code */
+               DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
diff --git a/queue-4.8/dccp-fix-out-of-bound-access-in-dccp_v4_err.patch b/queue-4.8/dccp-fix-out-of-bound-access-in-dccp_v4_err.patch

new file mode 100644 (file)

index 0000000..0ed869a
--- /dev/null
+++ b/queue-4.8/dccp-fix-out-of-bound-access-in-dccp_v4_err.patch
@@ -0,0 +1,56 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 2 Nov 2016 19:00:40 -0700
+Subject: dccp: fix out of bound access in dccp_v4_err()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 6706a97fec963d6cb3f7fc2978ec1427b4651214 ]
+
+dccp_v4_err() does not use pskb_may_pull() and might access garbage.
+
+We only need 4 bytes at the beginning of the DCCP header, like TCP,
+so the 8 bytes pulled in icmp_socket_deliver() are more than enough.
+
+This patch might allow to process more ICMP messages, as some routers
+are still limiting the size of reflected bytes to 28 (RFC 792), instead
+of extended lengths (RFC 1812 4.3.2.3)
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv4.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *
+ {
+       const struct iphdr *iph = (struct iphdr *)skb->data;
+       const u8 offset = iph->ihl << 2;
+-      const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
++      const struct dccp_hdr *dh;
+       struct dccp_sock *dp;
+       struct inet_sock *inet;
+       const int type = icmp_hdr(skb)->type;
+@@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *
+       int err;
+       struct net *net = dev_net(skb->dev);
+ 
+-      if (skb->len < offset + sizeof(*dh) ||
+-          skb->len < offset + __dccp_basic_hdr_len(dh)) {
+-              __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
+-              return;
+-      }
++      /* Only need dccph_dport & dccph_sport which are the first
++       * 4 bytes in dccp header.
++       * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
++       */
++      BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
++      BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
++      dh = (struct dccp_hdr *)(skb->data + offset);
+ 
+       sk = __inet_lookup_established(net, &dccp_hashinfo,
+                                      iph->daddr, dh->dccph_dport,
diff --git a/queue-4.8/dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch b/queue-4.8/dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch

new file mode 100644 (file)

index 0000000..f471b8d
--- /dev/null
+++ b/queue-4.8/dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch
@@ -0,0 +1,87 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 28 Oct 2016 18:43:11 +0200
+Subject: dctcp: avoid bogus doubling of cwnd after loss
+
+From: Florian Westphal <fw@strlen.de>
+
+
+[ Upstream commit ce6dd23329b1ee6a794acf5f7e40f8e89b8317ee ]
+
+If a congestion control module doesn't provide .undo_cwnd function,
+tcp_undo_cwnd_reduction() will set cwnd to
+
+   tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
+
+... which makes sense for reno (it sets ssthresh to half the current cwnd),
+but it makes no sense for dctcp, which sets ssthresh based on the current
+congestion estimate.
+
+This can cause severe growth of cwnd (eventually overflowing u32).
+
+Fix this by saving last cwnd on loss and restore cwnd based on that,
+similar to cubic and other algorithms.
+
+Fixes: e3118e8359bb7c ("net: tcp: add DCTCP congestion control algorithm")
+Cc: Lawrence Brakmo <brakmo@fb.com>
+Cc: Andrew Shewmaker <agshew@gmail.com>
+Cc: Glenn Judd <glenn.judd@morganstanley.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_dctcp.c |   13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_dctcp.c
++++ b/net/ipv4/tcp_dctcp.c
+@@ -56,6 +56,7 @@ struct dctcp {
+       u32 next_seq;
+       u32 ce_state;
+       u32 delayed_ack_reserved;
++      u32 loss_cwnd;
+ };
+ 
+ static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
+@@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk)
+               ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
+ 
+               ca->delayed_ack_reserved = 0;
++              ca->loss_cwnd = 0;
+               ca->ce_state = 0;
+ 
+               dctcp_reset(tp, ca);
+@@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk)
+ 
+ static u32 dctcp_ssthresh(struct sock *sk)
+ {
+-      const struct dctcp *ca = inet_csk_ca(sk);
++      struct dctcp *ca = inet_csk_ca(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
+ 
++      ca->loss_cwnd = tp->snd_cwnd;
+       return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
+ }
+ 
+@@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock
+       return 0;
+ }
+ 
++static u32 dctcp_cwnd_undo(struct sock *sk)
++{
++      const struct dctcp *ca = inet_csk_ca(sk);
++
++      return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
++}
++
+ static struct tcp_congestion_ops dctcp __read_mostly = {
+       .init           = dctcp_init,
+       .in_ack_event   = dctcp_update_alpha,
+       .cwnd_event     = dctcp_cwnd_event,
+       .ssthresh       = dctcp_ssthresh,
+       .cong_avoid     = tcp_reno_cong_avoid,
++      .undo_cwnd      = dctcp_cwnd_undo,
+       .set_state      = dctcp_state,
+       .get_info       = dctcp_get_info,
+       .flags          = TCP_CONG_NEEDS_ECN,
diff --git a/queue-4.8/fib_trie-correct-proc-net-route-off-by-one-error.patch b/queue-4.8/fib_trie-correct-proc-net-route-off-by-one-error.patch

new file mode 100644 (file)

index 0000000..1064307
--- /dev/null
+++ b/queue-4.8/fib_trie-correct-proc-net-route-off-by-one-error.patch
@@ -0,0 +1,102 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+Date: Fri, 4 Nov 2016 15:11:57 -0400
+Subject: fib_trie: Correct /proc/net/route off by one error
+
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+
+
+[ Upstream commit fd0285a39b1cb496f60210a9a00ad33a815603e7 ]
+
+The display of /proc/net/route has had a couple issues due to the fact that
+when I originally rewrote most of fib_trie I made it so that the iterator
+was tracking the next value to use instead of the current.
+
+In addition it had an off by 1 error where I was tracking the first piece
+of data as position 0, even though in reality that belonged to the
+SEQ_START_TOKEN.
+
+This patch updates the code so the iterator tracks the last reported
+position and key instead of the next expected position and key.  In
+addition it shifts things so that all of the leaves start at 1 instead of
+trying to report leaves starting with offset 0 as being valid.  With these
+two issues addressed this should resolve any off by one errors that were
+present in the display of /proc/net/route.
+
+Fixes: 25b97c016b26 ("ipv4: off-by-one in continuation handling in /proc/net/route")
+Cc: Andy Whitcroft <apw@canonical.com>
+Reported-by: Jason Baron <jbaron@akamai.com>
+Tested-by: Jason Baron <jbaron@akamai.com>
+Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_trie.c |   21 +++++++++------------
+ 1 file changed, 9 insertions(+), 12 deletions(-)
+
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -2455,22 +2455,19 @@ static struct key_vector *fib_route_get_
+       struct key_vector *l, **tp = &iter->tnode;
+       t_key key;
+ 
+-      /* use cache location of next-to-find key */
++      /* use cached location of previously found key */
+       if (iter->pos > 0 && pos >= iter->pos) {
+-              pos -= iter->pos;
+               key = iter->key;
+       } else {
+-              iter->pos = 0;
++              iter->pos = 1;
+               key = 0;
+       }
+ 
+-      while ((l = leaf_walk_rcu(tp, key)) != NULL) {
++      pos -= iter->pos;
++
++      while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
+               key = l->key + 1;
+               iter->pos++;
+-
+-              if (--pos <= 0)
+-                      break;
+-
+               l = NULL;
+ 
+               /* handle unlikely case of a key wrap */
+@@ -2479,7 +2476,7 @@ static struct key_vector *fib_route_get_
+       }
+ 
+       if (l)
+-              iter->key = key;        /* remember it */
++              iter->key = l->key;     /* remember it */
+       else
+               iter->pos = 0;          /* forget it */
+ 
+@@ -2507,7 +2504,7 @@ static void *fib_route_seq_start(struct
+               return fib_route_get_idx(iter, *pos);
+ 
+       iter->pos = 0;
+-      iter->key = 0;
++      iter->key = KEY_MAX;
+ 
+       return SEQ_START_TOKEN;
+ }
+@@ -2516,7 +2513,7 @@ static void *fib_route_seq_next(struct s
+ {
+       struct fib_route_iter *iter = seq->private;
+       struct key_vector *l = NULL;
+-      t_key key = iter->key;
++      t_key key = iter->key + 1;
+ 
+       ++*pos;
+ 
+@@ -2525,7 +2522,7 @@ static void *fib_route_seq_next(struct s
+               l = leaf_walk_rcu(&iter->tnode, key);
+ 
+       if (l) {
+-              iter->key = l->key + 1;
++              iter->key = l->key;
+               iter->pos++;
+       } else {
+               iter->pos = 0;
diff --git a/queue-4.8/ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch b/queue-4.8/ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch

new file mode 100644 (file)

index 0000000..7b282a3
--- /dev/null
+++ b/queue-4.8/ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch
@@ -0,0 +1,36 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Eli Cooper <elicooper@gmx.com>
+Date: Tue, 1 Nov 2016 23:45:12 +0800
+Subject: ip6_tunnel: Clear IP6CB in ip6tunnel_xmit()
+
+From: Eli Cooper <elicooper@gmx.com>
+
+
+[ Upstream commit 23f4ffedb7d751c7e298732ba91ca75d224bc1a6 ]
+
+skb->cb may contain data from previous layers. In the observed scenario,
+the garbage data were misinterpreted as IP6CB(skb)->frag_max_size, so
+that small packets sent through the tunnel are mistakenly fragmented.
+
+This patch unconditionally clears the control buffer in ip6tunnel_xmit(),
+which affects ip6_tunnel, ip6_udp_tunnel and ip6_gre. Currently none of
+these tunnels set IP6CB(skb)->flags, otherwise it needs to be done earlier.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Eli Cooper <elicooper@gmx.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ip6_tunnel.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/include/net/ip6_tunnel.h
++++ b/include/net/ip6_tunnel.h
+@@ -145,6 +145,7 @@ static inline void ip6tunnel_xmit(struct
+ {
+       int pkt_len, err;
+ 
++      memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+       pkt_len = skb->len - skb_inner_network_offset(skb);
+       err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
+       if (unlikely(net_xmit_eval(err)))
diff --git a/queue-4.8/ipv4-allow-local-fragmentation-in-ip_finish_output_gso.patch b/queue-4.8/ipv4-allow-local-fragmentation-in-ip_finish_output_gso.patch

new file mode 100644 (file)

index 0000000..6096ff2
--- /dev/null
+++ b/queue-4.8/ipv4-allow-local-fragmentation-in-ip_finish_output_gso.patch
@@ -0,0 +1,112 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Lance Richardson <lrichard@redhat.com>
+Date: Wed, 2 Nov 2016 16:36:17 -0400
+Subject: ipv4: allow local fragmentation in ip_finish_output_gso()
+
+From: Lance Richardson <lrichard@redhat.com>
+
+
+[ Upstream commit 9ee6c5dc816aa8256257f2cd4008a9291ec7e985 ]
+
+Some configurations (e.g. geneve interface with default
+MTU of 1500 over an ethernet interface with 1500 MTU) result
+in the transmission of packets that exceed the configured MTU.
+While this should be considered to be a "bad" configuration,
+it is still allowed and should not result in the sending
+of packets that exceed the configured MTU.
+
+Fix by dropping the assumption in ip_finish_output_gso() that
+locally originated gso packets will never need fragmentation.
+Basic testing using iperf (observing CPU usage and bandwidth)
+have shown no measurable performance impact for traffic not
+requiring fragmentation.
+
+Fixes: c7ba65d7b649 ("net: ip: push gso skb forwarding handling down the stack")
+Reported-by: Jan Tluka <jtluka@redhat.com>
+Signed-off-by: Lance Richardson <lrichard@redhat.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ip.h          |    1 -
+ net/ipv4/ip_forward.c     |    2 +-
+ net/ipv4/ip_output.c      |    6 ++----
+ net/ipv4/ip_tunnel_core.c |   11 -----------
+ net/ipv4/ipmr.c           |    2 +-
+ 5 files changed, 4 insertions(+), 18 deletions(-)
+
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -47,7 +47,6 @@ struct inet_skb_parm {
+ #define IPSKB_REROUTED                BIT(4)
+ #define IPSKB_DOREDIRECT      BIT(5)
+ #define IPSKB_FRAG_PMTU               BIT(6)
+-#define IPSKB_FRAG_SEGS               BIT(7)
+ 
+       u16                     frag_max_size;
+ };
+--- a/net/ipv4/ip_forward.c
++++ b/net/ipv4/ip_forward.c
+@@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb)
+       if (opt->is_strictroute && rt->rt_uses_gateway)
+               goto sr_failed;
+ 
+-      IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
++      IPCB(skb)->flags |= IPSKB_FORWARDED;
+       mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
+       if (ip_exceeds_mtu(skb, mtu)) {
+               IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -223,11 +223,9 @@ static int ip_finish_output_gso(struct n
+       struct sk_buff *segs;
+       int ret = 0;
+ 
+-      /* common case: fragmentation of segments is not allowed,
+-       * or seglen is <= mtu
++      /* common case: seglen is <= mtu
+        */
+-      if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) ||
+-            skb_gso_validate_mtu(skb, mtu))
++      if (skb_gso_validate_mtu(skb, mtu))
+               return ip_finish_output2(net, sk, skb);
+ 
+       /* Slowpath -  GSO segment length is exceeding the dst MTU.
+--- a/net/ipv4/ip_tunnel_core.c
++++ b/net/ipv4/ip_tunnel_core.c
+@@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, stru
+       int pkt_len = skb->len - skb_inner_network_offset(skb);
+       struct net *net = dev_net(rt->dst.dev);
+       struct net_device *dev = skb->dev;
+-      int skb_iif = skb->skb_iif;
+       struct iphdr *iph;
+       int err;
+ 
+@@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, stru
+       skb_dst_set(skb, &rt->dst);
+       memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ 
+-      if (skb_iif && !(df & htons(IP_DF))) {
+-              /* Arrived from an ingress interface, got encapsulated, with
+-               * fragmentation of encapulating frames allowed.
+-               * If skb is gso, the resulting encapsulated network segments
+-               * may exceed dst mtu.
+-               * Allow IP Fragmentation of segments.
+-               */
+-              IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
+-      }
+-
+       /* Push down and install the IP header. */
+       skb_push(skb, sizeof(struct iphdr));
+       skb_reset_network_header(skb);
+--- a/net/ipv4/ipmr.c
++++ b/net/ipv4/ipmr.c
+@@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *
+               vif->dev->stats.tx_bytes += skb->len;
+       }
+ 
+-      IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
++      IPCB(skb)->flags |= IPSKB_FORWARDED;
+ 
+       /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
+        * not only before forwarding, but after forwarding on all output
diff --git a/queue-4.8/ipv4-use-new_gw-for-redirect-neigh-lookup.patch b/queue-4.8/ipv4-use-new_gw-for-redirect-neigh-lookup.patch

new file mode 100644 (file)

index 0000000..d3e8996
--- /dev/null
+++ b/queue-4.8/ipv4-use-new_gw-for-redirect-neigh-lookup.patch
@@ -0,0 +1,51 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Stephen Suryaputra Lin <stephen.suryaputra.lin@gmail.com>
+Date: Thu, 10 Nov 2016 11:16:15 -0500
+Subject: ipv4: use new_gw for redirect neigh lookup
+
+From: Stephen Suryaputra Lin <stephen.suryaputra.lin@gmail.com>
+
+
+[ Upstream commit 969447f226b451c453ddc83cac6144eaeac6f2e3 ]
+
+In v2.6, ip_rt_redirect() calls arp_bind_neighbour() which returns 0
+and then the state of the neigh for the new_gw is checked. If the state
+isn't valid then the redirected route is deleted. This behavior is
+maintained up to v3.5.7 by check_peer_redirect() because rt->rt_gateway
+is assigned to peer->redirect_learned.a4 before calling
+ipv4_neigh_lookup().
+
+After commit 5943634fc559 ("ipv4: Maintain redirect and PMTU info in
+struct rtable again."), ipv4_neigh_lookup() is performed without the
+rt_gateway assigned to the new_gw. In the case when rt_gateway (old_gw)
+isn't zero, the function uses it as the key. The neigh is most likely
+valid since the old_gw is the one that sends the ICMP redirect message.
+Then the new_gw is assigned to fib_nh_exception. The problem is: the
+new_gw ARP may never gets resolved and the traffic is blackholed.
+
+So, use the new_gw for neigh lookup.
+
+Changes from v1:
+ - use __ipv4_neigh_lookup instead (per Eric Dumazet).
+
+Fixes: 5943634fc559 ("ipv4: Maintain redirect and PMTU info in struct rtable again.")
+Signed-off-by: Stephen Suryaputra Lin <ssurya@ieee.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtab
+                       goto reject_redirect;
+       }
+ 
+-      n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
++      n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
++      if (!n)
++              n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
+       if (!IS_ERR(n)) {
+               if (!(n->nud_state & NUD_VALID)) {
+                       neigh_event_send(n, NULL);
diff --git a/queue-4.8/ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch b/queue-4.8/ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch

new file mode 100644 (file)

index 0000000..88a89cc
--- /dev/null
+++ b/queue-4.8/ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch
@@ -0,0 +1,35 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 3 Nov 2016 08:59:46 -0700
+Subject: ipv6: dccp: add missing bind_conflict to dccp_ipv6_mapped
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 990ff4d84408fc55942ca6644f67e361737b3d8e ]
+
+While fuzzing kernel with syzkaller, Andrey reported a nasty crash
+in inet6_bind() caused by DCCP lacking a required method.
+
+Fixes: ab1e0a13d7029 ("[SOCK] proto: Add hashinfo member to struct proto")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Tested-by: Andrey Konovalov <andreyknvl@google.com>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv6.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -958,6 +958,7 @@ static const struct inet_connection_sock
+       .getsockopt        = ipv6_getsockopt,
+       .addr2sockaddr     = inet6_csk_addr2sockaddr,
+       .sockaddr_len      = sizeof(struct sockaddr_in6),
++      .bind_conflict     = inet6_csk_bind_conflict,
+ #ifdef CONFIG_COMPAT
+       .compat_setsockopt = compat_ipv6_setsockopt,
+       .compat_getsockopt = compat_ipv6_getsockopt,
diff --git a/queue-4.8/ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch b/queue-4.8/ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch

new file mode 100644 (file)

index 0000000..2ad176b
--- /dev/null
+++ b/queue-4.8/ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch
@@ -0,0 +1,53 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 2 Nov 2016 20:30:48 -0700
+Subject: ipv6: dccp: fix out of bound access in dccp_v6_err()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 1aa9d1a0e7eefcc61696e147d123453fc0016005 ]
+
+dccp_v6_err() does not use pskb_may_pull() and might access garbage.
+
+We only need 4 bytes at the beginning of the DCCP header, like TCP,
+so the 8 bytes pulled in icmpv6_notify() are more than enough.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv6.c |   15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *
+                       u8 type, u8 code, int offset, __be32 info)
+ {
+       const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
+-      const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
++      const struct dccp_hdr *dh;
+       struct dccp_sock *dp;
+       struct ipv6_pinfo *np;
+       struct sock *sk;
+@@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *
+       __u64 seq;
+       struct net *net = dev_net(skb->dev);
+ 
+-      if (skb->len < offset + sizeof(*dh) ||
+-          skb->len < offset + __dccp_basic_hdr_len(dh)) {
+-              __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+-                                ICMP6_MIB_INERRORS);
+-              return;
+-      }
++      /* Only need dccph_dport & dccph_sport which are the first
++       * 4 bytes in dccp header.
++       * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
++       */
++      BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
++      BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
++      dh = (struct dccp_hdr *)(skb->data + offset);
+ 
+       sk = __inet6_lookup_established(net, &dccp_hashinfo,
+                                       &hdr->daddr, dh->dccph_dport,
diff --git a/queue-4.8/mlxsw-spectrum-fix-refcount-bug-on-span-entries.patch b/queue-4.8/mlxsw-spectrum-fix-refcount-bug-on-span-entries.patch

new file mode 100644 (file)

index 0000000..8e31baa
--- /dev/null
+++ b/queue-4.8/mlxsw-spectrum-fix-refcount-bug-on-span-entries.patch
@@ -0,0 +1,56 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Yotam Gigi <yotamg@mellanox.com>
+Date: Fri, 11 Nov 2016 16:34:25 +0100
+Subject: mlxsw: spectrum: Fix refcount bug on span entries
+
+From: Yotam Gigi <yotamg@mellanox.com>
+
+
+[ Upstream commit 2d644d4c7506646f9c4a2afceb7fd5f030bc0c9f ]
+
+When binding port to a newly created span entry, its refcount is
+initialized to zero even though it has a bound port. That leads
+to unexpected behaviour when the user tries to delete that port
+from the span entry.
+
+Fix this by initializing the reference count to 1.
+
+Also add a warning to put function.
+
+Fixes: 763b4b70afcd ("mlxsw: spectrum: Add support in matchall mirror TC offloading")
+Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
+Reviewed-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/spectrum.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+@@ -231,7 +231,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_
+ 
+       span_entry->used = true;
+       span_entry->id = index;
+-      span_entry->ref_count = 0;
++      span_entry->ref_count = 1;
+       span_entry->local_port = local_port;
+       return span_entry;
+ }
+@@ -268,6 +268,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_spa
+ 
+       span_entry = mlxsw_sp_span_entry_find(port);
+       if (span_entry) {
++              /* Already exists, just take a reference */
+               span_entry->ref_count++;
+               return span_entry;
+       }
+@@ -278,6 +279,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_spa
+ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
+                                  struct mlxsw_sp_span_entry *span_entry)
+ {
++      WARN_ON(!span_entry->ref_count);
+       if (--span_entry->ref_count == 0)
+               mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
+       return 0;
diff --git a/queue-4.8/mlxsw-spectrum_router-correctly-dump-neighbour-activity.patch b/queue-4.8/mlxsw-spectrum_router-correctly-dump-neighbour-activity.patch

new file mode 100644 (file)

index 0000000..85c4af2
--- /dev/null
+++ b/queue-4.8/mlxsw-spectrum_router-correctly-dump-neighbour-activity.patch
@@ -0,0 +1,67 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Arkadi Sharshevsky <arkadis@mellanox.com>
+Date: Fri, 11 Nov 2016 16:34:26 +0100
+Subject: mlxsw: spectrum_router: Correctly dump neighbour activity
+
+From: Arkadi Sharshevsky <arkadis@mellanox.com>
+
+
+[ Upstream commit 42cdb338f40a98e6558bae35456fe86b6e90e1ef ]
+
+The device's neighbour table is periodically dumped in order to update
+the kernel about active neighbours. A single dump session may span
+multiple queries, until the response carries less records than requested
+or when a record (can contain up to four neighbour entries) is not full.
+Current code stops the session when the number of returned records is
+zero, which can result in infinite loop in case of high packet rate.
+
+Fix this by stopping the session according to the above logic.
+
+Fixes: c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table")
+Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c |   22 +++++++++++++++++-
+ 1 file changed, 21 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+@@ -777,6 +777,26 @@ static void mlxsw_sp_router_neigh_rec_pr
+       }
+ }
+ 
++static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
++{
++      u8 num_rec, last_rec_index, num_entries;
++
++      num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
++      last_rec_index = num_rec - 1;
++
++      if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
++              return false;
++      if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
++          MLXSW_REG_RAUHTD_TYPE_IPV6)
++              return true;
++
++      num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
++                                                              last_rec_index);
++      if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
++              return true;
++      return false;
++}
++
+ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
+ {
+       char *rauhtd_pl;
+@@ -803,7 +823,7 @@ static int mlxsw_sp_router_neighs_update
+               for (i = 0; i < num_rec; i++)
+                       mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
+                                                         i);
+-      } while (num_rec);
++      } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
+       rtnl_unlock();
+ 
+       kfree(rauhtd_pl);
diff --git a/queue-4.8/net-__skb_flow_dissect-must-cap-its-return-value.patch b/queue-4.8/net-__skb_flow_dissect-must-cap-its-return-value.patch

new file mode 100644 (file)

index 0000000..4ef0887
--- /dev/null
+++ b/queue-4.8/net-__skb_flow_dissect-must-cap-its-return-value.patch
@@ -0,0 +1,59 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 9 Nov 2016 16:04:46 -0800
+Subject: net: __skb_flow_dissect() must cap its return value
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 34fad54c2537f7c99d07375e50cb30aa3c23bd83 ]
+
+After Tom patch, thoff field could point past the end of the buffer,
+this could fool some callers.
+
+If an skb was provided, skb->len should be the upper limit.
+If not, hlen is supposed to be the upper limit.
+
+Fixes: a6e544b0a88b ("flow_dissector: Jump to exit code in __skb_flow_dissect")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Yibin Yang <yibyang@cisco.com
+Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Acked-by: Willem de Bruijn <willemb@google.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/flow_dissector.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -118,7 +118,7 @@ bool __skb_flow_dissect(const struct sk_
+       struct flow_dissector_key_tags *key_tags;
+       struct flow_dissector_key_keyid *key_keyid;
+       u8 ip_proto = 0;
+-      bool ret = false;
++      bool ret;
+ 
+       if (!data) {
+               data = skb->data;
+@@ -481,12 +481,17 @@ ip_proto_again:
+ out_good:
+       ret = true;
+ 
+-out_bad:
++      key_control->thoff = (u16)nhoff;
++out:
+       key_basic->n_proto = proto;
+       key_basic->ip_proto = ip_proto;
+-      key_control->thoff = (u16)nhoff;
+ 
+       return ret;
++
++out_bad:
++      ret = false;
++      key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
++      goto out;
+ }
+ EXPORT_SYMBOL(__skb_flow_dissect);
+ 
diff --git a/queue-4.8/net-clear-sk_err_soft-in-sk_clone_lock.patch b/queue-4.8/net-clear-sk_err_soft-in-sk_clone_lock.patch

new file mode 100644 (file)

index 0000000..e617dd6
--- /dev/null
+++ b/queue-4.8/net-clear-sk_err_soft-in-sk_clone_lock.patch
@@ -0,0 +1,34 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 28 Oct 2016 13:40:24 -0700
+Subject: net: clear sk_err_soft in sk_clone_lock()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit e551c32d57c88923f99f8f010e89ca7ed0735e83 ]
+
+At accept() time, it is possible the parent has a non zero
+sk_err_soft, leftover from a prior error.
+
+Make sure we do not leave this value in the child, as it
+makes future getsockopt(SO_ERROR) calls quite unreliable.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1563,6 +1563,7 @@ struct sock *sk_clone_lock(const struct
+               RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
+ 
+               newsk->sk_err      = 0;
++              newsk->sk_err_soft = 0;
+               newsk->sk_priority = 0;
+               newsk->sk_incoming_cpu = raw_smp_processor_id();
+               atomic64_set(&newsk->sk_cookie, 0);
diff --git a/queue-4.8/net-icmp6_send-should-use-dst-dev-to-determine-l3-domain.patch b/queue-4.8/net-icmp6_send-should-use-dst-dev-to-determine-l3-domain.patch

new file mode 100644 (file)

index 0000000..8a0c9b8
--- /dev/null
+++ b/queue-4.8/net-icmp6_send-should-use-dst-dev-to-determine-l3-domain.patch
@@ -0,0 +1,33 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: David Ahern <dsa@cumulusnetworks.com>
+Date: Thu, 3 Nov 2016 16:17:26 -0700
+Subject: net: icmp6_send should use dst dev to determine L3 domain
+
+From: David Ahern <dsa@cumulusnetworks.com>
+
+
+[ Upstream commit 5d41ce29e3b91ef305f88d23f72b3359de329cec ]
+
+icmp6_send is called in response to some event. The skb may not have
+the device set (skb->dev is NULL), but it is expected to have a dst set.
+Update icmp6_send to use the dst on the skb to determine L3 domain.
+
+Fixes: ca254490c8dfd ("net: Add VRF support to IPv6 stack")
+Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/icmp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/icmp.c
++++ b/net/ipv6/icmp.c
+@@ -448,7 +448,7 @@ static void icmp6_send(struct sk_buff *s
+       if (__ipv6_addr_needs_scope_id(addr_type))
+               iif = skb->dev->ifindex;
+       else
+-              iif = l3mdev_master_ifindex(skb->dev);
++              iif = l3mdev_master_ifindex(skb_dst(skb)->dev);
+ 
+       /*
+        *      Must not send error if the source does not uniquely
diff --git a/queue-4.8/net-icmp_route_lookup-should-use-rt-dev-to-determine-l3-domain.patch b/queue-4.8/net-icmp_route_lookup-should-use-rt-dev-to-determine-l3-domain.patch

new file mode 100644 (file)

index 0000000..1b09769
--- /dev/null
+++ b/queue-4.8/net-icmp_route_lookup-should-use-rt-dev-to-determine-l3-domain.patch
@@ -0,0 +1,43 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: David Ahern <dsa@cumulusnetworks.com>
+Date: Mon, 7 Nov 2016 12:03:09 -0800
+Subject: net: icmp_route_lookup should use rt dev to determine L3 domain
+
+From: David Ahern <dsa@cumulusnetworks.com>
+
+
+[ Upstream commit 9d1a6c4ea43e48c7880c85971c17939b56832d8a ]
+
+icmp_send is called in response to some event. The skb may not have
+the device set (skb->dev is NULL), but it is expected to have an rt.
+Update icmp_route_lookup to use the rt on the skb to determine L3
+domain.
+
+Fixes: 613d09b30f8b ("net: Use VRF device index for lookups on TX")
+Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/icmp.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup(
+       fl4->flowi4_proto = IPPROTO_ICMP;
+       fl4->fl4_icmp_type = type;
+       fl4->fl4_icmp_code = code;
+-      fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
++      fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
+ 
+       security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
+       rt = __ip_route_output_key_hash(net, fl4,
+@@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup(
+       if (err)
+               goto relookup_failed;
+ 
+-      if (inet_addr_type_dev_table(net, skb_in->dev,
++      if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev,
+                                    fl4_dec.saddr) == RTN_LOCAL) {
+               rt2 = __ip_route_output_key(net, &fl4_dec);
+               if (IS_ERR(rt2))
diff --git a/queue-4.8/net-mangle-zero-checksum-in-skb_checksum_help.patch b/queue-4.8/net-mangle-zero-checksum-in-skb_checksum_help.patch

new file mode 100644 (file)

index 0000000..600ef5f
--- /dev/null
+++ b/queue-4.8/net-mangle-zero-checksum-in-skb_checksum_help.patch
@@ -0,0 +1,41 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 29 Oct 2016 11:02:36 -0700
+Subject: net: mangle zero checksum in skb_checksum_help()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 4f2e4ad56a65f3b7d64c258e373cb71e8d2499f4 ]
+
+Sending zero checksum is ok for TCP, but not for UDP.
+
+UDPv6 receiver should by default drop a frame with a 0 checksum,
+and UDPv4 would not verify the checksum and might accept a corrupted
+packet.
+
+Simply replace such checksum by 0xffff, regardless of transport.
+
+This error was caught on SIT tunnels, but seems generic.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Maciej Żenczykowski <maze@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Acked-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2484,7 +2484,7 @@ int skb_checksum_help(struct sk_buff *sk
+                       goto out;
+       }
+ 
+-      *(__sum16 *)(skb->data + offset) = csum_fold(csum);
++      *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
+ out_set_summed:
+       skb->ip_summed = CHECKSUM_NONE;
+ out:
diff --git a/queue-4.8/net-stmmac-fix-lack-of-link-transition-for-fixed-phys.patch b/queue-4.8/net-stmmac-fix-lack-of-link-transition-for-fixed-phys.patch

new file mode 100644 (file)

index 0000000..9862f76
--- /dev/null
+++ b/queue-4.8/net-stmmac-fix-lack-of-link-transition-for-fixed-phys.patch
@@ -0,0 +1,50 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Sun, 13 Nov 2016 17:50:35 -0800
+Subject: net: stmmac: Fix lack of link transition for fixed PHYs
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit c51e424dc79e1428afc4d697cdb6a07f7af70cbf ]
+
+Commit 52f95bbfcf72 ("stmmac: fix adjust link call in case of a switch
+is attached") added some logic to avoid polling the fixed PHY and
+therefore invoking the adjust_link callback more than once, since this
+is a fixed PHY and link events won't be generated.
+
+This works fine the first time, because we start with phydev->irq =
+PHY_POLL, so we call adjust_link, then we set phydev->irq =
+PHY_IGNORE_INTERRUPT and we stop polling the PHY.
+
+Now, if we called ndo_close(), which calls both phy_stop() and does an
+explicit netif_carrier_off(), we end up with a link down. Upon calling
+ndo_open() again, despite starting the PHY state machine, we have
+PHY_IGNORE_INTERRUPT set, and we generate no link event at all, so the
+link is permanently down.
+
+Fixes: 52f95bbfcf72 ("stmmac: fix adjust link call in case of a switch is attached")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -871,6 +871,13 @@ static int stmmac_init_phy(struct net_de
+               return -ENODEV;
+       }
+ 
++      /* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid
++       * subsequent PHY polling, make sure we force a link transition if
++       * we have a UP/DOWN/UP transition
++       */
++      if (phydev->is_pseudo_fixed_link)
++              phydev->irq = PHY_POLL;
++
+       pr_debug("stmmac_init_phy:  %s: attached to PHY (UID 0x%x)"
+                " Link = %d\n", dev->name, phydev->phy_id, phydev->link);
+ 
diff --git a/queue-4.8/revert-bnx2-reset-device-during-driver-initialization.patch b/queue-4.8/revert-bnx2-reset-device-during-driver-initialization.patch

new file mode 100644 (file)

index 0000000..c8847cd
--- /dev/null
+++ b/queue-4.8/revert-bnx2-reset-device-during-driver-initialization.patch
@@ -0,0 +1,67 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Baoquan He <bhe@redhat.com>
+Date: Sun, 13 Nov 2016 13:01:32 +0800
+Subject: Revert "bnx2: Reset device during driver initialization"
+
+From: Baoquan He <bhe@redhat.com>
+
+
+[ Upstream commit 5d0d4b91bf627f14f95167b738d524156c9d440b ]
+
+This reverts commit 3e1be7ad2d38c6bd6aeef96df9bd0a7822f4e51c.
+
+When people build bnx2 driver into kernel, it will fail to detect
+and load firmware because firmware is contained in initramfs and
+initramfs has not been uncompressed yet during do_initcalls. So
+revert commit 3e1be7a and work out a new way in the later patch.
+
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Acked-by: Paul Menzel <pmenzel@molgen.mpg.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnx2.c |   12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnx2.c
++++ b/drivers/net/ethernet/broadcom/bnx2.c
+@@ -6356,6 +6356,10 @@ bnx2_open(struct net_device *dev)
+       struct bnx2 *bp = netdev_priv(dev);
+       int rc;
+ 
++      rc = bnx2_request_firmware(bp);
++      if (rc < 0)
++              goto out;
++
+       netif_carrier_off(dev);
+ 
+       bnx2_disable_int(bp);
+@@ -6424,6 +6428,7 @@ open_err:
+       bnx2_free_irq(bp);
+       bnx2_free_mem(bp);
+       bnx2_del_napi(bp);
++      bnx2_release_firmware(bp);
+       goto out;
+ }
+ 
+@@ -8570,12 +8575,6 @@ bnx2_init_one(struct pci_dev *pdev, cons
+ 
+       pci_set_drvdata(pdev, dev);
+ 
+-      rc = bnx2_request_firmware(bp);
+-      if (rc < 0)
+-              goto error;
+-
+-
+-      bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
+       memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN);
+ 
+       dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
+@@ -8608,7 +8607,6 @@ bnx2_init_one(struct pci_dev *pdev, cons
+       return 0;
+ 
+ error:
+-      bnx2_release_firmware(bp);
+       pci_iounmap(pdev, bp->regview);
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
diff --git a/queue-4.8/revert-include-uapi-linux-atm_zatm.h-include-linux-time.h.patch b/queue-4.8/revert-include-uapi-linux-atm_zatm.h-include-linux-time.h.patch

new file mode 100644 (file)

index 0000000..435f867
--- /dev/null
+++ b/queue-4.8/revert-include-uapi-linux-atm_zatm.h-include-linux-time.h.patch
@@ -0,0 +1,57 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Mike Frysinger <vapier@gentoo.org>
+Date: Thu, 10 Nov 2016 19:08:39 -0500
+Subject: Revert "include/uapi/linux/atm_zatm.h: include linux/time.h"
+
+From: Mike Frysinger <vapier@gentoo.org>
+
+
+[ Upstream commit 7b5b74efcca00f15c2aec1dc7175bfe34b6ec643 ]
+
+This reverts commit cf00713a655d ("include/uapi/linux/atm_zatm.h: include
+linux/time.h").
+
+This attempted to fix userspace breakage that no longer existed when
+the patch was merged.  Almost one year earlier, commit 70ba07b675b5
+("atm: remove 'struct zatm_t_hist'") deleted the struct in question.
+
+After this patch was merged, we now have to deal with people being
+unable to include this header in conjunction with standard C library
+headers like stdlib.h (which linux-atm does).  Example breakage:
+x86_64-pc-linux-gnu-gcc -DHAVE_CONFIG_H -I. -I../.. -I./../q2931 -I./../saal \
+       -I.  -DCPPFLAGS_TEST  -I../../src/include -O2 -march=native -pipe -g \
+       -frecord-gcc-switches -freport-bug -Wimplicit-function-declaration \
+       -Wnonnull -Wstrict-aliasing -Wparentheses -Warray-bounds \
+       -Wfree-nonheap-object -Wreturn-local-addr -fno-strict-aliasing -Wall \
+       -Wshadow -Wpointer-arith -Wwrite-strings -Wstrict-prototypes -c zntune.c
+In file included from /usr/include/linux/atm_zatm.h:17:0,
+                 from zntune.c:17:
+/usr/include/linux/time.h:9:8: error: redefinition of ‘struct timespec’
+ struct timespec {
+        ^
+In file included from /usr/include/sys/select.h:43:0,
+                 from /usr/include/sys/types.h:219,
+                 from /usr/include/stdlib.h:314,
+                 from zntune.c:9:
+/usr/include/time.h:120:8: note: originally defined here
+ struct timespec
+        ^
+
+Signed-off-by: Mike Frysinger <vapier@gentoo.org>
+Acked-by: Mikko Rapeli <mikko.rapeli@iki.fi>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/atm_zatm.h |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/include/uapi/linux/atm_zatm.h
++++ b/include/uapi/linux/atm_zatm.h
+@@ -14,7 +14,6 @@
+ 
+ #include <linux/atmapi.h>
+ #include <linux/atmioc.h>
+-#include <linux/time.h>
+ 
+ #define ZATM_GETPOOL  _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc)
+                                               /* get pool statistics */
diff --git a/queue-4.8/sctp-assign-assoc_id-earlier-in-__sctp_connect.patch b/queue-4.8/sctp-assign-assoc_id-earlier-in-__sctp_connect.patch

new file mode 100644 (file)

index 0000000..f265c1f
--- /dev/null
+++ b/queue-4.8/sctp-assign-assoc_id-earlier-in-__sctp_connect.patch
@@ -0,0 +1,57 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Date: Thu, 3 Nov 2016 17:03:41 -0200
+Subject: sctp: assign assoc_id earlier in __sctp_connect
+
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+
+
+[ Upstream commit 7233bc84a3aeda835d334499dc00448373caf5c0 ]
+
+sctp_wait_for_connect() currently already holds the asoc to keep it
+alive during the sleep, in case another thread release it. But Andrey
+Konovalov and Dmitry Vyukov reported an use-after-free in such
+situation.
+
+Problem is that __sctp_connect() doesn't get a ref on the asoc and will
+do a read on the asoc after calling sctp_wait_for_connect(), but by then
+another thread may have closed it and the _put on sctp_wait_for_connect
+will actually release it, causing the use-after-free.
+
+Fix is, instead of doing the read after waiting for the connect, do it
+before so, and avoid this issue as the socket is still locked by then.
+There should be no issue on returning the asoc id in case of failure as
+the application shouldn't trust on that number in such situations
+anyway.
+
+This issue doesn't exist in sctp_sendmsg() path.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Tested-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Reviewed-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *s
+ 
+       timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
+ 
+-      err = sctp_wait_for_connect(asoc, &timeo);
+-      if ((err == 0 || err == -EINPROGRESS) && assoc_id)
++      if (assoc_id)
+               *assoc_id = asoc->assoc_id;
++      err = sctp_wait_for_connect(asoc, &timeo);
++      /* Note: the asoc may be freed after the return of
++       * sctp_wait_for_connect.
++       */
+ 
+       /* Don't free association on exit. */
+       asoc = NULL;
diff --git a/queue-4.8/sctp-change-sk-state-only-when-it-has-assocs-in-sctp_shutdown.patch b/queue-4.8/sctp-change-sk-state-only-when-it-has-assocs-in-sctp_shutdown.patch

new file mode 100644 (file)

index 0000000..fbbf40d
--- /dev/null
+++ b/queue-4.8/sctp-change-sk-state-only-when-it-has-assocs-in-sctp_shutdown.patch
@@ -0,0 +1,63 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sun, 13 Nov 2016 21:44:37 +0800
+Subject: sctp: change sk state only when it has assocs in sctp_shutdown
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 5bf35ddfee052d44f39ebaa395d87101c8918405 ]
+
+Now when users shutdown a sock with SEND_SHUTDOWN in sctp, even if
+this sock has no connection (assoc), sk state would be changed to
+SCTP_SS_CLOSING, which is not as we expect.
+
+Besides, after that if users try to listen on this sock, kernel
+could even panic when it dereference sctp_sk(sk)->bind_hash in
+sctp_inet_listen, as bind_hash is null when sock has no assoc.
+
+This patch is to move sk state change after checking sk assocs
+is not empty, and also merge these two if() conditions and reduce
+indent level.
+
+Fixes: d46e416c11c8 ("sctp: sctp should change socket state when shutdown is received")
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Tested-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c |   15 +++++++--------
+ 1 file changed, 7 insertions(+), 8 deletions(-)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -4281,19 +4281,18 @@ static void sctp_shutdown(struct sock *s
+ {
+       struct net *net = sock_net(sk);
+       struct sctp_endpoint *ep;
+-      struct sctp_association *asoc;
+ 
+       if (!sctp_style(sk, TCP))
+               return;
+ 
+-      if (how & SEND_SHUTDOWN) {
++      ep = sctp_sk(sk)->ep;
++      if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
++              struct sctp_association *asoc;
++
+               sk->sk_state = SCTP_SS_CLOSING;
+-              ep = sctp_sk(sk)->ep;
+-              if (!list_empty(&ep->asocs)) {
+-                      asoc = list_entry(ep->asocs.next,
+-                                        struct sctp_association, asocs);
+-                      sctp_primitive_SHUTDOWN(net, asoc, NULL);
+-              }
++              asoc = list_entry(ep->asocs.next,
++                                struct sctp_association, asocs);
++              sctp_primitive_SHUTDOWN(net, asoc, NULL);
+       }
+ }
+ 
diff --git a/queue-4.8/series b/queue-4.8/series

new file mode 100644 (file)

index 0000000..0760e2d
--- /dev/null
+++ b/queue-4.8/series
@@ -0,0 +1,29 @@
+dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch
+net-clear-sk_err_soft-in-sk_clone_lock.patch
+net-mangle-zero-checksum-in-skb_checksum_help.patch
+bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch
+ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch
+tcp-fix-potential-memory-corruption.patch
+ipv4-allow-local-fragmentation-in-ip_finish_output_gso.patch
+tcp-fix-return-value-for-partial-writes.patch
+dccp-do-not-release-listeners-too-soon.patch
+dccp-do-not-send-reset-to-already-closed-sockets.patch
+dccp-fix-out-of-bound-access-in-dccp_v4_err.patch
+ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch
+ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch
+sctp-assign-assoc_id-earlier-in-__sctp_connect.patch
+bpf-fix-htab-map-destruction-when-extra-reserve-is-in-use.patch
+net-icmp6_send-should-use-dst-dev-to-determine-l3-domain.patch
+fib_trie-correct-proc-net-route-off-by-one-error.patch
+sock-fix-sendmmsg-for-partial-sendmsg.patch
+net-icmp_route_lookup-should-use-rt-dev-to-determine-l3-domain.patch
+net-__skb_flow_dissect-must-cap-its-return-value.patch
+ipv4-use-new_gw-for-redirect-neigh-lookup.patch
+tcp-take-care-of-truncations-done-by-sk_filter.patch
+revert-include-uapi-linux-atm_zatm.h-include-linux-time.h.patch
+mlxsw-spectrum-fix-refcount-bug-on-span-entries.patch
+mlxsw-spectrum_router-correctly-dump-neighbour-activity.patch
+revert-bnx2-reset-device-during-driver-initialization.patch
+bnx2-wait-for-in-flight-dma-to-complete-at-probe-stage.patch
+sctp-change-sk-state-only-when-it-has-assocs-in-sctp_shutdown.patch
+net-stmmac-fix-lack-of-link-transition-for-fixed-phys.patch
diff --git a/queue-4.8/sock-fix-sendmmsg-for-partial-sendmsg.patch b/queue-4.8/sock-fix-sendmmsg-for-partial-sendmsg.patch

new file mode 100644 (file)

index 0000000..4de7155
--- /dev/null
+++ b/queue-4.8/sock-fix-sendmmsg-for-partial-sendmsg.patch
@@ -0,0 +1,49 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Soheil Hassas Yeganeh <soheil@google.com>
+Date: Fri, 4 Nov 2016 15:36:49 -0400
+Subject: sock: fix sendmmsg for partial sendmsg
+
+From: Soheil Hassas Yeganeh <soheil@google.com>
+
+
+[ Upstream commit 3023898b7d4aac65987bd2f485cc22390aae6f78 ]
+
+Do not send the next message in sendmmsg for partial sendmsg
+invocations.
+
+sendmmsg assumes that it can continue sending the next message
+when the return value of the individual sendmsg invocations
+is positive. It results in corrupting the data for TCP,
+SCTP, and UNIX streams.
+
+For example, sendmmsg([["abcd"], ["efgh"]]) can result in a stream
+of "aefgh" if the first sendmsg invocation sends only the first
+byte while the second sendmsg goes through.
+
+Datagram sockets either send the entire datagram or fail, so
+this patch affects only sockets of type SOCK_STREAM and
+SOCK_SEQPACKET.
+
+Fixes: 228e548e6020 ("net: Add sendmmsg socket system call")
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/socket.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -2041,6 +2041,8 @@ int __sys_sendmmsg(int fd, struct mmsghd
+               if (err)
+                       break;
+               ++datagrams;
++              if (msg_data_left(&msg_sys))
++                      break;
+               cond_resched();
+       }
+ 
diff --git a/queue-4.8/tcp-fix-potential-memory-corruption.patch b/queue-4.8/tcp-fix-potential-memory-corruption.patch

new file mode 100644 (file)

index 0000000..cb33090
--- /dev/null
+++ b/queue-4.8/tcp-fix-potential-memory-corruption.patch
@@ -0,0 +1,40 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 2 Nov 2016 07:53:17 -0700
+Subject: tcp: fix potential memory corruption
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit ac9e70b17ecd7c6e933ff2eaf7ab37429e71bf4d ]
+
+Imagine initial value of max_skb_frags is 17, and last
+skb in write queue has 15 frags.
+
+Then max_skb_frags is lowered to 14 or smaller value.
+
+tcp_sendmsg() will then be allowed to add additional page frags
+and eventually go past MAX_SKB_FRAGS, overflowing struct
+skb_shared_info.
+
+Fixes: 5f74f82ea34c ("net:Add sysctl_max_skb_frags")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
+Cc: Håkon Bugge <haakon.bugge@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1219,7 +1219,7 @@ new_segment:
+ 
+                       if (!skb_can_coalesce(skb, i, pfrag->page,
+                                             pfrag->offset)) {
+-                              if (i == sysctl_max_skb_frags || !sg) {
++                              if (i >= sysctl_max_skb_frags || !sg) {
+                                       tcp_mark_push(tp, skb);
+                                       goto new_segment;
+                               }
diff --git a/queue-4.8/tcp-fix-return-value-for-partial-writes.patch b/queue-4.8/tcp-fix-return-value-for-partial-writes.patch

new file mode 100644 (file)

index 0000000..cd04b65
--- /dev/null
+++ b/queue-4.8/tcp-fix-return-value-for-partial-writes.patch
@@ -0,0 +1,48 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 2 Nov 2016 14:41:50 -0700
+Subject: tcp: fix return value for partial writes
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 79d8665b9545e128637c51cf7febde9c493b6481 ]
+
+After my commit, tcp_sendmsg() might restart its loop after
+processing socket backlog.
+
+If sk_err is set, we blindly return an error, even though we
+copied data to user space before.
+
+We should instead return number of bytes that could be copied,
+otherwise user space might resend data and corrupt the stream.
+
+This might happen if another thread is using recvmsg(MSG_ERRQUEUE)
+to process timestamps.
+
+Issue was diagnosed by Soheil and Willem, big kudos to them !
+
+Fixes: d41a69f1d390f ("tcp: make tcp_sendmsg() aware of socket backlog")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Cc: Soheil Hassas Yeganeh <soheil@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Tested-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1145,7 +1145,7 @@ restart:
+ 
+       err = -EPIPE;
+       if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+-              goto out_err;
++              goto do_error;
+ 
+       sg = !!(sk->sk_route_caps & NETIF_F_SG);
+ 
diff --git a/queue-4.8/tcp-take-care-of-truncations-done-by-sk_filter.patch b/queue-4.8/tcp-take-care-of-truncations-done-by-sk_filter.patch

new file mode 100644 (file)

index 0000000..516cee5
--- /dev/null
+++ b/queue-4.8/tcp-take-care-of-truncations-done-by-sk_filter.patch
@@ -0,0 +1,102 @@
+From foo@baz Fri Nov 18 11:35:46 CET 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 10 Nov 2016 13:12:35 -0800
+Subject: tcp: take care of truncations done by sk_filter()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit ac6e780070e30e4c35bd395acfe9191e6268bdd3 ]
+
+With syzkaller help, Marco Grassi found a bug in TCP stack,
+crashing in tcp_collapse()
+
+Root cause is that sk_filter() can truncate the incoming skb,
+but TCP stack was not really expecting this to happen.
+It probably was expecting a simple DROP or ACCEPT behavior.
+
+We first need to make sure no part of TCP header could be removed.
+Then we need to adjust TCP_SKB_CB(skb)->end_seq
+
+Many thanks to syzkaller team and Marco for giving us a reproducer.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Marco Grassi <marco.gra@gmail.com>
+Reported-by: Vladis Dronov <vdronov@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h   |    1 +
+ net/ipv4/tcp_ipv4.c |   19 ++++++++++++++++++-
+ net/ipv6/tcp_ipv6.c |    6 ++++--
+ 3 files changed, 23 insertions(+), 3 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1164,6 +1164,7 @@ static inline void tcp_prequeue_init(str
+ }
+ 
+ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
++int tcp_filter(struct sock *sk, struct sk_buff *skb);
+ 
+ #undef STATE_TRACE
+ 
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1537,6 +1537,21 @@ bool tcp_prequeue(struct sock *sk, struc
+ }
+ EXPORT_SYMBOL(tcp_prequeue);
+ 
++int tcp_filter(struct sock *sk, struct sk_buff *skb)
++{
++      struct tcphdr *th = (struct tcphdr *)skb->data;
++      unsigned int eaten = skb->len;
++      int err;
++
++      err = sk_filter_trim_cap(sk, skb, th->doff * 4);
++      if (!err) {
++              eaten -= skb->len;
++              TCP_SKB_CB(skb)->end_seq -= eaten;
++      }
++      return err;
++}
++EXPORT_SYMBOL(tcp_filter);
++
+ /*
+  *    From tcp_input.c
+  */
+@@ -1648,8 +1663,10 @@ process:
+ 
+       nf_reset(skb);
+ 
+-      if (sk_filter(sk, skb))
++      if (tcp_filter(sk, skb))
+               goto discard_and_relse;
++      th = (const struct tcphdr *)skb->data;
++      iph = ip_hdr(skb);
+ 
+       skb->dev = NULL;
+ 
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1228,7 +1228,7 @@ static int tcp_v6_do_rcv(struct sock *sk
+       if (skb->protocol == htons(ETH_P_IP))
+               return tcp_v4_do_rcv(sk, skb);
+ 
+-      if (sk_filter(sk, skb))
++      if (tcp_filter(sk, skb))
+               goto discard;
+ 
+       /*
+@@ -1455,8 +1455,10 @@ process:
+       if (tcp_v6_inbound_md5_hash(sk, skb))
+               goto discard_and_relse;
+ 
+-      if (sk_filter(sk, skb))
++      if (tcp_filter(sk, skb))
+               goto discard_and_relse;
++      th = (const struct tcphdr *)skb->data;
++      hdr = ipv6_hdr(skb);
+ 
+       skb->dev = NULL;
+
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 18 Nov 2016 10:36:31 +0000 (11:36 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 18 Nov 2016 10:36:31 +0000 (11:36 +0100)
queue-4.8/bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/bnx2-wait-for-in-flight-dma-to-complete-at-probe-stage.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/bpf-fix-htab-map-destruction-when-extra-reserve-is-in-use.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/dccp-do-not-release-listeners-too-soon.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/dccp-do-not-send-reset-to-already-closed-sockets.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/dccp-fix-out-of-bound-access-in-dccp_v4_err.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/fib_trie-correct-proc-net-route-off-by-one-error.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/ipv4-allow-local-fragmentation-in-ip_finish_output_gso.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/ipv4-use-new_gw-for-redirect-neigh-lookup.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/mlxsw-spectrum-fix-refcount-bug-on-span-entries.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/mlxsw-spectrum_router-correctly-dump-neighbour-activity.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/net-__skb_flow_dissect-must-cap-its-return-value.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/net-clear-sk_err_soft-in-sk_clone_lock.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/net-icmp6_send-should-use-dst-dev-to-determine-l3-domain.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/net-icmp_route_lookup-should-use-rt-dev-to-determine-l3-domain.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/net-mangle-zero-checksum-in-skb_checksum_help.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/net-stmmac-fix-lack-of-link-transition-for-fixed-phys.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/revert-bnx2-reset-device-during-driver-initialization.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/revert-include-uapi-linux-atm_zatm.h-include-linux-time.h.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/sctp-assign-assoc_id-earlier-in-__sctp_connect.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/sctp-change-sk-state-only-when-it-has-assocs-in-sctp_shutdown.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/series	[new file with mode: 0644]	patch \| blob
queue-4.8/sock-fix-sendmmsg-for-partial-sendmsg.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/tcp-fix-potential-memory-corruption.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/tcp-fix-return-value-for-partial-writes.patch	[new file with mode: 0644]	patch \| blob
queue-4.8/tcp-take-care-of-truncations-done-by-sk_filter.patch	[new file with mode: 0644]	patch \| blob