From 5c38cac1e62be3fbda108df14ddc0b4b6e7b016d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 18 Nov 2016 11:36:31 +0100 Subject: [PATCH] 4.8-stable patches added patches: bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch bnx2-wait-for-in-flight-dma-to-complete-at-probe-stage.patch bpf-fix-htab-map-destruction-when-extra-reserve-is-in-use.patch dccp-do-not-release-listeners-too-soon.patch dccp-do-not-send-reset-to-already-closed-sockets.patch dccp-fix-out-of-bound-access-in-dccp_v4_err.patch dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch fib_trie-correct-proc-net-route-off-by-one-error.patch ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch ipv4-allow-local-fragmentation-in-ip_finish_output_gso.patch ipv4-use-new_gw-for-redirect-neigh-lookup.patch ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch mlxsw-spectrum-fix-refcount-bug-on-span-entries.patch mlxsw-spectrum_router-correctly-dump-neighbour-activity.patch net-__skb_flow_dissect-must-cap-its-return-value.patch net-clear-sk_err_soft-in-sk_clone_lock.patch net-icmp6_send-should-use-dst-dev-to-determine-l3-domain.patch net-icmp_route_lookup-should-use-rt-dev-to-determine-l3-domain.patch net-mangle-zero-checksum-in-skb_checksum_help.patch net-stmmac-fix-lack-of-link-transition-for-fixed-phys.patch revert-bnx2-reset-device-during-driver-initialization.patch revert-include-uapi-linux-atm_zatm.h-include-linux-time.h.patch sctp-assign-assoc_id-earlier-in-__sctp_connect.patch sctp-change-sk-state-only-when-it-has-assocs-in-sctp_shutdown.patch sock-fix-sendmmsg-for-partial-sendmsg.patch tcp-fix-potential-memory-corruption.patch tcp-fix-return-value-for-partial-writes.patch tcp-take-care-of-truncations-done-by-sk_filter.patch --- ...ntrol-register-right-after-it-is-set.patch | 59 ++++++++ ...light-dma-to-complete-at-probe-stage.patch | 95 ++++++++++++ ...ruction-when-extra-reserve-is-in-use.patch | 42 ++++++ ...cp-do-not-release-listeners-too-soon.patch | 142 ++++++++++++++++++ ...send-reset-to-already-closed-sockets.patch | 74 +++++++++ ...x-out-of-bound-access-in-dccp_v4_err.patch | 56 +++++++ ...id-bogus-doubling-of-cwnd-after-loss.patch | 87 +++++++++++ ...rect-proc-net-route-off-by-one-error.patch | 102 +++++++++++++ ...tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch | 36 +++++ ...ragmentation-in-ip_finish_output_gso.patch | 112 ++++++++++++++ ...use-new_gw-for-redirect-neigh-lookup.patch | 51 +++++++ ...ng-bind_conflict-to-dccp_ipv6_mapped.patch | 35 +++++ ...x-out-of-bound-access-in-dccp_v6_err.patch | 53 +++++++ ...rum-fix-refcount-bug-on-span-entries.patch | 56 +++++++ ...er-correctly-dump-neighbour-activity.patch | 67 +++++++++ ...ow_dissect-must-cap-its-return-value.patch | 59 ++++++++ ...t-clear-sk_err_soft-in-sk_clone_lock.patch | 34 +++++ ...d-use-dst-dev-to-determine-l3-domain.patch | 33 ++++ ...ld-use-rt-dev-to-determine-l3-domain.patch | 43 ++++++ ...e-zero-checksum-in-skb_checksum_help.patch | 41 +++++ ...ck-of-link-transition-for-fixed-phys.patch | 50 ++++++ ...-device-during-driver-initialization.patch | 67 +++++++++ ...inux-atm_zatm.h-include-linux-time.h.patch | 57 +++++++ ...n-assoc_id-earlier-in-__sctp_connect.patch | 57 +++++++ ...-when-it-has-assocs-in-sctp_shutdown.patch | 63 ++++++++ queue-4.8/series | 29 ++++ ...ock-fix-sendmmsg-for-partial-sendmsg.patch | 49 ++++++ .../tcp-fix-potential-memory-corruption.patch | 40 +++++ ...-fix-return-value-for-partial-writes.patch | 48 ++++++ ...are-of-truncations-done-by-sk_filter.patch | 102 +++++++++++++ 30 files changed, 1839 insertions(+) create mode 100644 queue-4.8/bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch create mode 100644 queue-4.8/bnx2-wait-for-in-flight-dma-to-complete-at-probe-stage.patch create mode 100644 queue-4.8/bpf-fix-htab-map-destruction-when-extra-reserve-is-in-use.patch create mode 100644 queue-4.8/dccp-do-not-release-listeners-too-soon.patch create mode 100644 queue-4.8/dccp-do-not-send-reset-to-already-closed-sockets.patch create mode 100644 queue-4.8/dccp-fix-out-of-bound-access-in-dccp_v4_err.patch create mode 100644 queue-4.8/dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch create mode 100644 queue-4.8/fib_trie-correct-proc-net-route-off-by-one-error.patch create mode 100644 queue-4.8/ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch create mode 100644 queue-4.8/ipv4-allow-local-fragmentation-in-ip_finish_output_gso.patch create mode 100644 queue-4.8/ipv4-use-new_gw-for-redirect-neigh-lookup.patch create mode 100644 queue-4.8/ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch create mode 100644 queue-4.8/ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch create mode 100644 queue-4.8/mlxsw-spectrum-fix-refcount-bug-on-span-entries.patch create mode 100644 queue-4.8/mlxsw-spectrum_router-correctly-dump-neighbour-activity.patch create mode 100644 queue-4.8/net-__skb_flow_dissect-must-cap-its-return-value.patch create mode 100644 queue-4.8/net-clear-sk_err_soft-in-sk_clone_lock.patch create mode 100644 queue-4.8/net-icmp6_send-should-use-dst-dev-to-determine-l3-domain.patch create mode 100644 queue-4.8/net-icmp_route_lookup-should-use-rt-dev-to-determine-l3-domain.patch create mode 100644 queue-4.8/net-mangle-zero-checksum-in-skb_checksum_help.patch create mode 100644 queue-4.8/net-stmmac-fix-lack-of-link-transition-for-fixed-phys.patch create mode 100644 queue-4.8/revert-bnx2-reset-device-during-driver-initialization.patch create mode 100644 queue-4.8/revert-include-uapi-linux-atm_zatm.h-include-linux-time.h.patch create mode 100644 queue-4.8/sctp-assign-assoc_id-earlier-in-__sctp_connect.patch create mode 100644 queue-4.8/sctp-change-sk-state-only-when-it-has-assocs-in-sctp_shutdown.patch create mode 100644 queue-4.8/series create mode 100644 queue-4.8/sock-fix-sendmmsg-for-partial-sendmsg.patch create mode 100644 queue-4.8/tcp-fix-potential-memory-corruption.patch create mode 100644 queue-4.8/tcp-fix-return-value-for-partial-writes.patch create mode 100644 queue-4.8/tcp-take-care-of-truncations-done-by-sk_filter.patch diff --git a/queue-4.8/bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch b/queue-4.8/bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch new file mode 100644 index 00000000000..0d9b406b72c --- /dev/null +++ b/queue-4.8/bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch @@ -0,0 +1,59 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Andy Gospodarek +Date: Mon, 31 Oct 2016 13:32:03 -0400 +Subject: bgmac: stop clearing DMA receive control register right after it is set + +From: Andy Gospodarek + + +[ Upstream commit fcdefccac976ee51dd6071832b842d8fb41c479c ] + +Current bgmac code initializes some DMA settings in the receive control +register for some hardware and then immediately clears those settings. +Not clearing those settings results in ~420Mbps *improvement* in +throughput; this system can now receive frames at line-rate on Broadcom +5871x hardware compared to ~520Mbps today. I also tested a few other +values but found there to be no discernible difference in CPU +utilization even if burst size and prefetching values are different. + +On the hardware tested there was no need to keep the code that cleared +all but bits 16-17, but since there is a wide variety of hardware that +used this driver (I did not look at all hardware docs for hardware using +this IP block), I find it wise to move this call up and clear bits just +after reading the default value from the hardware rather than completely +removing it. + +This is a good candidate for -stable >=3.14 since that is when the code +that was supposed to improve performance (but did not) was introduced. + +Signed-off-by: Andy Gospodarek +Fixes: 56ceecde1f29 ("bgmac: initialize the DMA controller of core...") +Cc: Hauke Mehrtens +Acked-by: Hauke Mehrtens +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bgmac.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -307,6 +307,10 @@ static void bgmac_dma_rx_enable(struct b + u32 ctl; + + ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL); ++ ++ /* preserve ONLY bits 16-17 from current hardware value */ ++ ctl &= BGMAC_DMA_RX_ADDREXT_MASK; ++ + if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) { + ctl &= ~BGMAC_DMA_RX_BL_MASK; + ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT; +@@ -317,7 +321,6 @@ static void bgmac_dma_rx_enable(struct b + ctl &= ~BGMAC_DMA_RX_PT_MASK; + ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT; + } +- ctl &= BGMAC_DMA_RX_ADDREXT_MASK; + ctl |= BGMAC_DMA_RX_ENABLE; + ctl |= BGMAC_DMA_RX_PARITY_DISABLE; + ctl |= BGMAC_DMA_RX_OVERFLOW_CONT; diff --git a/queue-4.8/bnx2-wait-for-in-flight-dma-to-complete-at-probe-stage.patch b/queue-4.8/bnx2-wait-for-in-flight-dma-to-complete-at-probe-stage.patch new file mode 100644 index 00000000000..86e17cec13f --- /dev/null +++ b/queue-4.8/bnx2-wait-for-in-flight-dma-to-complete-at-probe-stage.patch @@ -0,0 +1,95 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Baoquan He +Date: Sun, 13 Nov 2016 13:01:33 +0800 +Subject: bnx2: Wait for in-flight DMA to complete at probe stage + +From: Baoquan He + + +[ Upstream commit 6df77862f63f389df3b1ad879738e04440d7385d ] + +In-flight DMA from 1st kernel could continue going in kdump kernel. +New io-page table has been created before bnx2 does reset at open stage. +We have to wait for the in-flight DMA to complete to avoid it look up +into the newly created io-page table at probe stage. + +Suggested-by: Michael Chan +Signed-off-by: Baoquan He +Acked-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnx2.c | 38 +++++++++++++++++++++++++++++------ + 1 file changed, 32 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnx2.c ++++ b/drivers/net/ethernet/broadcom/bnx2.c +@@ -49,6 +49,7 @@ + #include + #include + #include ++#include + + #if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE) + #define BCM_CNIC 1 +@@ -4759,15 +4760,16 @@ bnx2_setup_msix_tbl(struct bnx2 *bp) + BNX2_WR(bp, BNX2_PCI_GRC_WINDOW3_ADDR, BNX2_MSIX_PBA_ADDR); + } + +-static int +-bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) ++static void ++bnx2_wait_dma_complete(struct bnx2 *bp) + { + u32 val; +- int i, rc = 0; +- u8 old_port; ++ int i; + +- /* Wait for the current PCI transaction to complete before +- * issuing a reset. */ ++ /* ++ * Wait for the current PCI transaction to complete before ++ * issuing a reset. ++ */ + if ((BNX2_CHIP(bp) == BNX2_CHIP_5706) || + (BNX2_CHIP(bp) == BNX2_CHIP_5708)) { + BNX2_WR(bp, BNX2_MISC_ENABLE_CLR_BITS, +@@ -4791,6 +4793,21 @@ bnx2_reset_chip(struct bnx2 *bp, u32 res + } + } + ++ return; ++} ++ ++ ++static int ++bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) ++{ ++ u32 val; ++ int i, rc = 0; ++ u8 old_port; ++ ++ /* Wait for the current PCI transaction to complete before ++ * issuing a reset. */ ++ bnx2_wait_dma_complete(bp); ++ + /* Wait for the firmware to tell us it is ok to issue a reset. */ + bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code, 1, 1); + +@@ -8575,6 +8592,15 @@ bnx2_init_one(struct pci_dev *pdev, cons + + pci_set_drvdata(pdev, dev); + ++ /* ++ * In-flight DMA from 1st kernel could continue going in kdump kernel. ++ * New io-page table has been created before bnx2 does reset at open stage. ++ * We have to wait for the in-flight DMA to complete to avoid it look up ++ * into the newly created io-page table. ++ */ ++ if (is_kdump_kernel()) ++ bnx2_wait_dma_complete(bp); ++ + memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN); + + dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | diff --git a/queue-4.8/bpf-fix-htab-map-destruction-when-extra-reserve-is-in-use.patch b/queue-4.8/bpf-fix-htab-map-destruction-when-extra-reserve-is-in-use.patch new file mode 100644 index 00000000000..1878b613765 --- /dev/null +++ b/queue-4.8/bpf-fix-htab-map-destruction-when-extra-reserve-is-in-use.patch @@ -0,0 +1,42 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Daniel Borkmann +Date: Fri, 4 Nov 2016 00:01:19 +0100 +Subject: bpf: fix htab map destruction when extra reserve is in use + +From: Daniel Borkmann + + +[ Upstream commit 483bed2b0ddd12ec33fc9407e0c6e1088e77a97c ] + +Commit a6ed3ea65d98 ("bpf: restore behavior of bpf_map_update_elem") +added an extra per-cpu reserve to the hash table map to restore old +behaviour from pre prealloc times. When non-prealloc is in use for a +map, then problem is that once a hash table extra element has been +linked into the hash-table, and the hash table is destroyed due to +refcount dropping to zero, then htab_map_free() -> delete_all_elements() +will walk the whole hash table and drop all elements via htab_elem_free(). +The problem is that the element from the extra reserve is first fed +to the wrong backend allocator and eventually freed twice. + +Fixes: a6ed3ea65d98 ("bpf: restore behavior of bpf_map_update_elem") +Reported-by: Dmitry Vyukov +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/hashtab.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/kernel/bpf/hashtab.c ++++ b/kernel/bpf/hashtab.c +@@ -687,7 +687,8 @@ static void delete_all_elements(struct b + + hlist_for_each_entry_safe(l, n, head, hash_node) { + hlist_del_rcu(&l->hash_node); +- htab_elem_free(htab, l); ++ if (l->state != HTAB_EXTRA_ELEM_USED) ++ htab_elem_free(htab, l); + } + } + } diff --git a/queue-4.8/dccp-do-not-release-listeners-too-soon.patch b/queue-4.8/dccp-do-not-release-listeners-too-soon.patch new file mode 100644 index 00000000000..eb0963b56f7 --- /dev/null +++ b/queue-4.8/dccp-do-not-release-listeners-too-soon.patch @@ -0,0 +1,142 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Eric Dumazet +Date: Wed, 2 Nov 2016 17:14:41 -0700 +Subject: dccp: do not release listeners too soon + +From: Eric Dumazet + + +[ Upstream commit c3f24cfb3e508c70c26ee8569d537c8ca67a36c6 ] + +Andrey Konovalov reported following error while fuzzing with syzkaller : + +IPv4: Attempt to release alive inet socket ffff880068e98940 +kasan: CONFIG_KASAN_INLINE enabled +kasan: GPF could be caused by NULL-ptr deref or user memory access +general protection fault: 0000 [#1] SMP KASAN +Modules linked in: +CPU: 1 PID: 3905 Comm: a.out Not tainted 4.9.0-rc3+ #333 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 +task: ffff88006b9e0000 task.stack: ffff880068770000 +RIP: 0010:[] [] +selinux_socket_sock_rcv_skb+0xff/0x6a0 security/selinux/hooks.c:4639 +RSP: 0018:ffff8800687771c8 EFLAGS: 00010202 +RAX: ffff88006b9e0000 RBX: 1ffff1000d0eee3f RCX: 1ffff1000d1d312a +RDX: 1ffff1000d1d31a6 RSI: dffffc0000000000 RDI: 0000000000000010 +RBP: ffff880068777360 R08: 0000000000000000 R09: 0000000000000002 +R10: dffffc0000000000 R11: 0000000000000006 R12: ffff880068e98940 +R13: 0000000000000002 R14: ffff880068777338 R15: 0000000000000000 +FS: 00007f00ff760700(0000) GS:ffff88006cd00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000020008000 CR3: 000000006a308000 CR4: 00000000000006e0 +Stack: + ffff8800687771e0 ffffffff812508a5 ffff8800686f3168 0000000000000007 + ffff88006ac8cdfc ffff8800665ea500 0000000041b58ab3 ffffffff847b5480 + ffffffff819eac60 ffff88006b9e0860 ffff88006b9e0868 ffff88006b9e07f0 +Call Trace: + [] security_sock_rcv_skb+0x75/0xb0 security/security.c:1317 + [] sk_filter_trim_cap+0x67/0x10e0 net/core/filter.c:81 + [] __sk_receive_skb+0x30/0xa00 net/core/sock.c:460 + [] dccp_v4_rcv+0xdb2/0x1910 net/dccp/ipv4.c:873 + [] ip_local_deliver_finish+0x332/0xad0 +net/ipv4/ip_input.c:216 + [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 + [< inline >] NF_HOOK ./include/linux/netfilter.h:255 + [] ip_local_deliver+0x1c2/0x4b0 net/ipv4/ip_input.c:257 + [< inline >] dst_input ./include/net/dst.h:507 + [] ip_rcv_finish+0x750/0x1c40 net/ipv4/ip_input.c:396 + [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 + [< inline >] NF_HOOK ./include/linux/netfilter.h:255 + [] ip_rcv+0x96f/0x12f0 net/ipv4/ip_input.c:487 + [] __netif_receive_skb_core+0x1897/0x2a50 net/core/dev.c:4213 + [] __netif_receive_skb+0x2a/0x170 net/core/dev.c:4251 + [] netif_receive_skb_internal+0x1b3/0x390 net/core/dev.c:4279 + [] netif_receive_skb+0x48/0x250 net/core/dev.c:4303 + [] tun_get_user+0xbd5/0x28a0 drivers/net/tun.c:1308 + [] tun_chr_write_iter+0xda/0x190 drivers/net/tun.c:1332 + [< inline >] new_sync_write fs/read_write.c:499 + [] __vfs_write+0x334/0x570 fs/read_write.c:512 + [] vfs_write+0x17b/0x500 fs/read_write.c:560 + [< inline >] SYSC_write fs/read_write.c:607 + [] SyS_write+0xd4/0x1a0 fs/read_write.c:599 + [] entry_SYSCALL_64_fastpath+0x1f/0xc2 + +It turns out DCCP calls __sk_receive_skb(), and this broke when +lookups no longer took a reference on listeners. + +Fix this issue by adding a @refcounted parameter to __sk_receive_skb(), +so that sock_put() is used only when needed. + +Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") +Signed-off-by: Eric Dumazet +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sock.h | 4 ++-- + net/core/sock.c | 5 +++-- + net/dccp/ipv4.c | 2 +- + net/dccp/ipv6.c | 3 ++- + 4 files changed, 8 insertions(+), 6 deletions(-) + +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1587,11 +1587,11 @@ static inline void sock_put(struct sock + void sock_gen_put(struct sock *sk); + + int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, +- unsigned int trim_cap); ++ unsigned int trim_cap, bool refcounted); + static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, + const int nested) + { +- return __sk_receive_skb(sk, skb, nested, 1); ++ return __sk_receive_skb(sk, skb, nested, 1, true); + } + + static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, + EXPORT_SYMBOL(sock_queue_rcv_skb); + + int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, +- const int nested, unsigned int trim_cap) ++ const int nested, unsigned int trim_cap, bool refcounted) + { + int rc = NET_RX_SUCCESS; + +@@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, st + + bh_unlock_sock(sk); + out: +- sock_put(sk); ++ if (refcounted) ++ sock_put(sk); + return rc; + discard_and_relse: + kfree_skb(skb); +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -868,7 +868,7 @@ lookup: + goto discard_and_relse; + nf_reset(skb); + +- return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4); ++ return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted); + + no_dccp_socket: + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -738,7 +738,8 @@ lookup: + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) + goto discard_and_relse; + +- return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0; ++ return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, ++ refcounted) ? -1 : 0; + + no_dccp_socket: + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) diff --git a/queue-4.8/dccp-do-not-send-reset-to-already-closed-sockets.patch b/queue-4.8/dccp-do-not-send-reset-to-already-closed-sockets.patch new file mode 100644 index 00000000000..bffecf05d76 --- /dev/null +++ b/queue-4.8/dccp-do-not-send-reset-to-already-closed-sockets.patch @@ -0,0 +1,74 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Eric Dumazet +Date: Wed, 2 Nov 2016 18:04:24 -0700 +Subject: dccp: do not send reset to already closed sockets + +From: Eric Dumazet + + +[ Upstream commit 346da62cc186c4b4b1ac59f87f4482b47a047388 ] + +Andrey reported following warning while fuzzing with syzkaller + +WARNING: CPU: 1 PID: 21072 at net/dccp/proto.c:83 dccp_set_state+0x229/0x290 +Kernel panic - not syncing: panic_on_warn set ... + +CPU: 1 PID: 21072 Comm: syz-executor Not tainted 4.9.0-rc1+ #293 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 + ffff88003d4c7738 ffffffff81b474f4 0000000000000003 dffffc0000000000 + ffffffff844f8b00 ffff88003d4c7804 ffff88003d4c7800 ffffffff8140c06a + 0000000041b58ab3 ffffffff8479ab7d ffffffff8140beae ffffffff8140cd00 +Call Trace: + [< inline >] __dump_stack lib/dump_stack.c:15 + [] dump_stack+0xb3/0x10f lib/dump_stack.c:51 + [] panic+0x1bc/0x39d kernel/panic.c:179 + [] __warn+0x1cc/0x1f0 kernel/panic.c:542 + [] warn_slowpath_null+0x2c/0x40 kernel/panic.c:585 + [] dccp_set_state+0x229/0x290 net/dccp/proto.c:83 + [] dccp_close+0x612/0xc10 net/dccp/proto.c:1016 + [] inet_release+0xef/0x1c0 net/ipv4/af_inet.c:415 + [] sock_release+0x8e/0x1d0 net/socket.c:570 + [] sock_close+0x16/0x20 net/socket.c:1017 + [] __fput+0x29d/0x720 fs/file_table.c:208 + [] ____fput+0x15/0x20 fs/file_table.c:244 + [] task_work_run+0xf8/0x170 kernel/task_work.c:116 + [< inline >] exit_task_work include/linux/task_work.h:21 + [] do_exit+0x883/0x2ac0 kernel/exit.c:828 + [] do_group_exit+0x10e/0x340 kernel/exit.c:931 + [] get_signal+0x634/0x15a0 kernel/signal.c:2307 + [] do_signal+0x8d/0x1a30 arch/x86/kernel/signal.c:807 + [] exit_to_usermode_loop+0xe5/0x130 +arch/x86/entry/common.c:156 + [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 + [] syscall_return_slowpath+0x1a8/0x1e0 +arch/x86/entry/common.c:259 + [] entry_SYSCALL_64_fastpath+0xc0/0xc2 +Dumping ftrace buffer: + (ftrace buffer empty) +Kernel Offset: disabled + +Fix this the same way we did for TCP in commit 565b7b2d2e63 +("tcp: do not send reset to already closed sockets") + +Signed-off-by: Eric Dumazet +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/proto.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/dccp/proto.c ++++ b/net/dccp/proto.c +@@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long ti + __kfree_skb(skb); + } + ++ /* If socket has been already reset kill it. */ ++ if (sk->sk_state == DCCP_CLOSED) ++ goto adjudge_to_death; ++ + if (data_was_unread) { + /* Unread data was tossed, send an appropriate Reset Code */ + DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); diff --git a/queue-4.8/dccp-fix-out-of-bound-access-in-dccp_v4_err.patch b/queue-4.8/dccp-fix-out-of-bound-access-in-dccp_v4_err.patch new file mode 100644 index 00000000000..0ed869ad4c6 --- /dev/null +++ b/queue-4.8/dccp-fix-out-of-bound-access-in-dccp_v4_err.patch @@ -0,0 +1,56 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Eric Dumazet +Date: Wed, 2 Nov 2016 19:00:40 -0700 +Subject: dccp: fix out of bound access in dccp_v4_err() + +From: Eric Dumazet + + +[ Upstream commit 6706a97fec963d6cb3f7fc2978ec1427b4651214 ] + +dccp_v4_err() does not use pskb_may_pull() and might access garbage. + +We only need 4 bytes at the beginning of the DCCP header, like TCP, +so the 8 bytes pulled in icmp_socket_deliver() are more than enough. + +This patch might allow to process more ICMP messages, as some routers +are still limiting the size of reflected bytes to 28 (RFC 792), instead +of extended lengths (RFC 1812 4.3.2.3) + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv4.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff * + { + const struct iphdr *iph = (struct iphdr *)skb->data; + const u8 offset = iph->ihl << 2; +- const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); ++ const struct dccp_hdr *dh; + struct dccp_sock *dp; + struct inet_sock *inet; + const int type = icmp_hdr(skb)->type; +@@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff * + int err; + struct net *net = dev_net(skb->dev); + +- if (skb->len < offset + sizeof(*dh) || +- skb->len < offset + __dccp_basic_hdr_len(dh)) { +- __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); +- return; +- } ++ /* Only need dccph_dport & dccph_sport which are the first ++ * 4 bytes in dccp header. ++ * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. ++ */ ++ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); ++ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); ++ dh = (struct dccp_hdr *)(skb->data + offset); + + sk = __inet_lookup_established(net, &dccp_hashinfo, + iph->daddr, dh->dccph_dport, diff --git a/queue-4.8/dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch b/queue-4.8/dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch new file mode 100644 index 00000000000..f471b8df739 --- /dev/null +++ b/queue-4.8/dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch @@ -0,0 +1,87 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Florian Westphal +Date: Fri, 28 Oct 2016 18:43:11 +0200 +Subject: dctcp: avoid bogus doubling of cwnd after loss + +From: Florian Westphal + + +[ Upstream commit ce6dd23329b1ee6a794acf5f7e40f8e89b8317ee ] + +If a congestion control module doesn't provide .undo_cwnd function, +tcp_undo_cwnd_reduction() will set cwnd to + + tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); + +... which makes sense for reno (it sets ssthresh to half the current cwnd), +but it makes no sense for dctcp, which sets ssthresh based on the current +congestion estimate. + +This can cause severe growth of cwnd (eventually overflowing u32). + +Fix this by saving last cwnd on loss and restore cwnd based on that, +similar to cubic and other algorithms. + +Fixes: e3118e8359bb7c ("net: tcp: add DCTCP congestion control algorithm") +Cc: Lawrence Brakmo +Cc: Andrew Shewmaker +Cc: Glenn Judd +Acked-by: Daniel Borkmann +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_dctcp.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_dctcp.c ++++ b/net/ipv4/tcp_dctcp.c +@@ -56,6 +56,7 @@ struct dctcp { + u32 next_seq; + u32 ce_state; + u32 delayed_ack_reserved; ++ u32 loss_cwnd; + }; + + static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ +@@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk) + ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); + + ca->delayed_ack_reserved = 0; ++ ca->loss_cwnd = 0; + ca->ce_state = 0; + + dctcp_reset(tp, ca); +@@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk) + + static u32 dctcp_ssthresh(struct sock *sk) + { +- const struct dctcp *ca = inet_csk_ca(sk); ++ struct dctcp *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + ++ ca->loss_cwnd = tp->snd_cwnd; + return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); + } + +@@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock + return 0; + } + ++static u32 dctcp_cwnd_undo(struct sock *sk) ++{ ++ const struct dctcp *ca = inet_csk_ca(sk); ++ ++ return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); ++} ++ + static struct tcp_congestion_ops dctcp __read_mostly = { + .init = dctcp_init, + .in_ack_event = dctcp_update_alpha, + .cwnd_event = dctcp_cwnd_event, + .ssthresh = dctcp_ssthresh, + .cong_avoid = tcp_reno_cong_avoid, ++ .undo_cwnd = dctcp_cwnd_undo, + .set_state = dctcp_state, + .get_info = dctcp_get_info, + .flags = TCP_CONG_NEEDS_ECN, diff --git a/queue-4.8/fib_trie-correct-proc-net-route-off-by-one-error.patch b/queue-4.8/fib_trie-correct-proc-net-route-off-by-one-error.patch new file mode 100644 index 00000000000..1064307eb5e --- /dev/null +++ b/queue-4.8/fib_trie-correct-proc-net-route-off-by-one-error.patch @@ -0,0 +1,102 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Alexander Duyck +Date: Fri, 4 Nov 2016 15:11:57 -0400 +Subject: fib_trie: Correct /proc/net/route off by one error + +From: Alexander Duyck + + +[ Upstream commit fd0285a39b1cb496f60210a9a00ad33a815603e7 ] + +The display of /proc/net/route has had a couple issues due to the fact that +when I originally rewrote most of fib_trie I made it so that the iterator +was tracking the next value to use instead of the current. + +In addition it had an off by 1 error where I was tracking the first piece +of data as position 0, even though in reality that belonged to the +SEQ_START_TOKEN. + +This patch updates the code so the iterator tracks the last reported +position and key instead of the next expected position and key. In +addition it shifts things so that all of the leaves start at 1 instead of +trying to report leaves starting with offset 0 as being valid. With these +two issues addressed this should resolve any off by one errors that were +present in the display of /proc/net/route. + +Fixes: 25b97c016b26 ("ipv4: off-by-one in continuation handling in /proc/net/route") +Cc: Andy Whitcroft +Reported-by: Jason Baron +Tested-by: Jason Baron +Signed-off-by: Alexander Duyck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_trie.c | 21 +++++++++------------ + 1 file changed, 9 insertions(+), 12 deletions(-) + +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -2455,22 +2455,19 @@ static struct key_vector *fib_route_get_ + struct key_vector *l, **tp = &iter->tnode; + t_key key; + +- /* use cache location of next-to-find key */ ++ /* use cached location of previously found key */ + if (iter->pos > 0 && pos >= iter->pos) { +- pos -= iter->pos; + key = iter->key; + } else { +- iter->pos = 0; ++ iter->pos = 1; + key = 0; + } + +- while ((l = leaf_walk_rcu(tp, key)) != NULL) { ++ pos -= iter->pos; ++ ++ while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) { + key = l->key + 1; + iter->pos++; +- +- if (--pos <= 0) +- break; +- + l = NULL; + + /* handle unlikely case of a key wrap */ +@@ -2479,7 +2476,7 @@ static struct key_vector *fib_route_get_ + } + + if (l) +- iter->key = key; /* remember it */ ++ iter->key = l->key; /* remember it */ + else + iter->pos = 0; /* forget it */ + +@@ -2507,7 +2504,7 @@ static void *fib_route_seq_start(struct + return fib_route_get_idx(iter, *pos); + + iter->pos = 0; +- iter->key = 0; ++ iter->key = KEY_MAX; + + return SEQ_START_TOKEN; + } +@@ -2516,7 +2513,7 @@ static void *fib_route_seq_next(struct s + { + struct fib_route_iter *iter = seq->private; + struct key_vector *l = NULL; +- t_key key = iter->key; ++ t_key key = iter->key + 1; + + ++*pos; + +@@ -2525,7 +2522,7 @@ static void *fib_route_seq_next(struct s + l = leaf_walk_rcu(&iter->tnode, key); + + if (l) { +- iter->key = l->key + 1; ++ iter->key = l->key; + iter->pos++; + } else { + iter->pos = 0; diff --git a/queue-4.8/ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch b/queue-4.8/ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch new file mode 100644 index 00000000000..7b282a380c4 --- /dev/null +++ b/queue-4.8/ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch @@ -0,0 +1,36 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Eli Cooper +Date: Tue, 1 Nov 2016 23:45:12 +0800 +Subject: ip6_tunnel: Clear IP6CB in ip6tunnel_xmit() + +From: Eli Cooper + + +[ Upstream commit 23f4ffedb7d751c7e298732ba91ca75d224bc1a6 ] + +skb->cb may contain data from previous layers. In the observed scenario, +the garbage data were misinterpreted as IP6CB(skb)->frag_max_size, so +that small packets sent through the tunnel are mistakenly fragmented. + +This patch unconditionally clears the control buffer in ip6tunnel_xmit(), +which affects ip6_tunnel, ip6_udp_tunnel and ip6_gre. Currently none of +these tunnels set IP6CB(skb)->flags, otherwise it needs to be done earlier. + +Cc: stable@vger.kernel.org +Signed-off-by: Eli Cooper +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip6_tunnel.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/include/net/ip6_tunnel.h ++++ b/include/net/ip6_tunnel.h +@@ -145,6 +145,7 @@ static inline void ip6tunnel_xmit(struct + { + int pkt_len, err; + ++ memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + pkt_len = skb->len - skb_inner_network_offset(skb); + err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); + if (unlikely(net_xmit_eval(err))) diff --git a/queue-4.8/ipv4-allow-local-fragmentation-in-ip_finish_output_gso.patch b/queue-4.8/ipv4-allow-local-fragmentation-in-ip_finish_output_gso.patch new file mode 100644 index 00000000000..6096ff25c23 --- /dev/null +++ b/queue-4.8/ipv4-allow-local-fragmentation-in-ip_finish_output_gso.patch @@ -0,0 +1,112 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Lance Richardson +Date: Wed, 2 Nov 2016 16:36:17 -0400 +Subject: ipv4: allow local fragmentation in ip_finish_output_gso() + +From: Lance Richardson + + +[ Upstream commit 9ee6c5dc816aa8256257f2cd4008a9291ec7e985 ] + +Some configurations (e.g. geneve interface with default +MTU of 1500 over an ethernet interface with 1500 MTU) result +in the transmission of packets that exceed the configured MTU. +While this should be considered to be a "bad" configuration, +it is still allowed and should not result in the sending +of packets that exceed the configured MTU. + +Fix by dropping the assumption in ip_finish_output_gso() that +locally originated gso packets will never need fragmentation. +Basic testing using iperf (observing CPU usage and bandwidth) +have shown no measurable performance impact for traffic not +requiring fragmentation. + +Fixes: c7ba65d7b649 ("net: ip: push gso skb forwarding handling down the stack") +Reported-by: Jan Tluka +Signed-off-by: Lance Richardson +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip.h | 1 - + net/ipv4/ip_forward.c | 2 +- + net/ipv4/ip_output.c | 6 ++---- + net/ipv4/ip_tunnel_core.c | 11 ----------- + net/ipv4/ipmr.c | 2 +- + 5 files changed, 4 insertions(+), 18 deletions(-) + +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -47,7 +47,6 @@ struct inet_skb_parm { + #define IPSKB_REROUTED BIT(4) + #define IPSKB_DOREDIRECT BIT(5) + #define IPSKB_FRAG_PMTU BIT(6) +-#define IPSKB_FRAG_SEGS BIT(7) + + u16 frag_max_size; + }; +--- a/net/ipv4/ip_forward.c ++++ b/net/ipv4/ip_forward.c +@@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb) + if (opt->is_strictroute && rt->rt_uses_gateway) + goto sr_failed; + +- IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; ++ IPCB(skb)->flags |= IPSKB_FORWARDED; + mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); + if (ip_exceeds_mtu(skb, mtu)) { + IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -223,11 +223,9 @@ static int ip_finish_output_gso(struct n + struct sk_buff *segs; + int ret = 0; + +- /* common case: fragmentation of segments is not allowed, +- * or seglen is <= mtu ++ /* common case: seglen is <= mtu + */ +- if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) || +- skb_gso_validate_mtu(skb, mtu)) ++ if (skb_gso_validate_mtu(skb, mtu)) + return ip_finish_output2(net, sk, skb); + + /* Slowpath - GSO segment length is exceeding the dst MTU. +--- a/net/ipv4/ip_tunnel_core.c ++++ b/net/ipv4/ip_tunnel_core.c +@@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, stru + int pkt_len = skb->len - skb_inner_network_offset(skb); + struct net *net = dev_net(rt->dst.dev); + struct net_device *dev = skb->dev; +- int skb_iif = skb->skb_iif; + struct iphdr *iph; + int err; + +@@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, stru + skb_dst_set(skb, &rt->dst); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + +- if (skb_iif && !(df & htons(IP_DF))) { +- /* Arrived from an ingress interface, got encapsulated, with +- * fragmentation of encapulating frames allowed. +- * If skb is gso, the resulting encapsulated network segments +- * may exceed dst mtu. +- * Allow IP Fragmentation of segments. +- */ +- IPCB(skb)->flags |= IPSKB_FRAG_SEGS; +- } +- + /* Push down and install the IP header. */ + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); +--- a/net/ipv4/ipmr.c ++++ b/net/ipv4/ipmr.c +@@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net * + vif->dev->stats.tx_bytes += skb->len; + } + +- IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; ++ IPCB(skb)->flags |= IPSKB_FORWARDED; + + /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally + * not only before forwarding, but after forwarding on all output diff --git a/queue-4.8/ipv4-use-new_gw-for-redirect-neigh-lookup.patch b/queue-4.8/ipv4-use-new_gw-for-redirect-neigh-lookup.patch new file mode 100644 index 00000000000..d3e8996ec36 --- /dev/null +++ b/queue-4.8/ipv4-use-new_gw-for-redirect-neigh-lookup.patch @@ -0,0 +1,51 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Stephen Suryaputra Lin +Date: Thu, 10 Nov 2016 11:16:15 -0500 +Subject: ipv4: use new_gw for redirect neigh lookup + +From: Stephen Suryaputra Lin + + +[ Upstream commit 969447f226b451c453ddc83cac6144eaeac6f2e3 ] + +In v2.6, ip_rt_redirect() calls arp_bind_neighbour() which returns 0 +and then the state of the neigh for the new_gw is checked. If the state +isn't valid then the redirected route is deleted. This behavior is +maintained up to v3.5.7 by check_peer_redirect() because rt->rt_gateway +is assigned to peer->redirect_learned.a4 before calling +ipv4_neigh_lookup(). + +After commit 5943634fc559 ("ipv4: Maintain redirect and PMTU info in +struct rtable again."), ipv4_neigh_lookup() is performed without the +rt_gateway assigned to the new_gw. In the case when rt_gateway (old_gw) +isn't zero, the function uses it as the key. The neigh is most likely +valid since the old_gw is the one that sends the ICMP redirect message. +Then the new_gw is assigned to fib_nh_exception. The problem is: the +new_gw ARP may never gets resolved and the traffic is blackholed. + +So, use the new_gw for neigh lookup. + +Changes from v1: + - use __ipv4_neigh_lookup instead (per Eric Dumazet). + +Fixes: 5943634fc559 ("ipv4: Maintain redirect and PMTU info in struct rtable again.") +Signed-off-by: Stephen Suryaputra Lin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtab + goto reject_redirect; + } + +- n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); ++ n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); ++ if (!n) ++ n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); + if (!IS_ERR(n)) { + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); diff --git a/queue-4.8/ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch b/queue-4.8/ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch new file mode 100644 index 00000000000..88a89cc17cc --- /dev/null +++ b/queue-4.8/ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch @@ -0,0 +1,35 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Eric Dumazet +Date: Thu, 3 Nov 2016 08:59:46 -0700 +Subject: ipv6: dccp: add missing bind_conflict to dccp_ipv6_mapped + +From: Eric Dumazet + + +[ Upstream commit 990ff4d84408fc55942ca6644f67e361737b3d8e ] + +While fuzzing kernel with syzkaller, Andrey reported a nasty crash +in inet6_bind() caused by DCCP lacking a required method. + +Fixes: ab1e0a13d7029 ("[SOCK] proto: Add hashinfo member to struct proto") +Signed-off-by: Eric Dumazet +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Cc: Arnaldo Carvalho de Melo +Acked-by: Arnaldo Carvalho de Melo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv6.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -958,6 +958,7 @@ static const struct inet_connection_sock + .getsockopt = ipv6_getsockopt, + .addr2sockaddr = inet6_csk_addr2sockaddr, + .sockaddr_len = sizeof(struct sockaddr_in6), ++ .bind_conflict = inet6_csk_bind_conflict, + #ifdef CONFIG_COMPAT + .compat_setsockopt = compat_ipv6_setsockopt, + .compat_getsockopt = compat_ipv6_getsockopt, diff --git a/queue-4.8/ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch b/queue-4.8/ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch new file mode 100644 index 00000000000..2ad176bf74d --- /dev/null +++ b/queue-4.8/ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch @@ -0,0 +1,53 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Eric Dumazet +Date: Wed, 2 Nov 2016 20:30:48 -0700 +Subject: ipv6: dccp: fix out of bound access in dccp_v6_err() + +From: Eric Dumazet + + +[ Upstream commit 1aa9d1a0e7eefcc61696e147d123453fc0016005 ] + +dccp_v6_err() does not use pskb_may_pull() and might access garbage. + +We only need 4 bytes at the beginning of the DCCP header, like TCP, +so the 8 bytes pulled in icmpv6_notify() are more than enough. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv6.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff * + u8 type, u8 code, int offset, __be32 info) + { + const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; +- const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); ++ const struct dccp_hdr *dh; + struct dccp_sock *dp; + struct ipv6_pinfo *np; + struct sock *sk; +@@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff * + __u64 seq; + struct net *net = dev_net(skb->dev); + +- if (skb->len < offset + sizeof(*dh) || +- skb->len < offset + __dccp_basic_hdr_len(dh)) { +- __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), +- ICMP6_MIB_INERRORS); +- return; +- } ++ /* Only need dccph_dport & dccph_sport which are the first ++ * 4 bytes in dccp header. ++ * Our caller (icmpv6_notify()) already pulled 8 bytes for us. ++ */ ++ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); ++ BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); ++ dh = (struct dccp_hdr *)(skb->data + offset); + + sk = __inet6_lookup_established(net, &dccp_hashinfo, + &hdr->daddr, dh->dccph_dport, diff --git a/queue-4.8/mlxsw-spectrum-fix-refcount-bug-on-span-entries.patch b/queue-4.8/mlxsw-spectrum-fix-refcount-bug-on-span-entries.patch new file mode 100644 index 00000000000..8e31baaac70 --- /dev/null +++ b/queue-4.8/mlxsw-spectrum-fix-refcount-bug-on-span-entries.patch @@ -0,0 +1,56 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Yotam Gigi +Date: Fri, 11 Nov 2016 16:34:25 +0100 +Subject: mlxsw: spectrum: Fix refcount bug on span entries + +From: Yotam Gigi + + +[ Upstream commit 2d644d4c7506646f9c4a2afceb7fd5f030bc0c9f ] + +When binding port to a newly created span entry, its refcount is +initialized to zero even though it has a bound port. That leads +to unexpected behaviour when the user tries to delete that port +from the span entry. + +Fix this by initializing the reference count to 1. + +Also add a warning to put function. + +Fixes: 763b4b70afcd ("mlxsw: spectrum: Add support in matchall mirror TC offloading") +Signed-off-by: Yotam Gigi +Reviewed-by: Ido Schimmel +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +@@ -231,7 +231,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_ + + span_entry->used = true; + span_entry->id = index; +- span_entry->ref_count = 0; ++ span_entry->ref_count = 1; + span_entry->local_port = local_port; + return span_entry; + } +@@ -268,6 +268,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_spa + + span_entry = mlxsw_sp_span_entry_find(port); + if (span_entry) { ++ /* Already exists, just take a reference */ + span_entry->ref_count++; + return span_entry; + } +@@ -278,6 +279,7 @@ struct mlxsw_sp_span_entry *mlxsw_sp_spa + static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) + { ++ WARN_ON(!span_entry->ref_count); + if (--span_entry->ref_count == 0) + mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); + return 0; diff --git a/queue-4.8/mlxsw-spectrum_router-correctly-dump-neighbour-activity.patch b/queue-4.8/mlxsw-spectrum_router-correctly-dump-neighbour-activity.patch new file mode 100644 index 00000000000..85c4af27d2d --- /dev/null +++ b/queue-4.8/mlxsw-spectrum_router-correctly-dump-neighbour-activity.patch @@ -0,0 +1,67 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Arkadi Sharshevsky +Date: Fri, 11 Nov 2016 16:34:26 +0100 +Subject: mlxsw: spectrum_router: Correctly dump neighbour activity + +From: Arkadi Sharshevsky + + +[ Upstream commit 42cdb338f40a98e6558bae35456fe86b6e90e1ef ] + +The device's neighbour table is periodically dumped in order to update +the kernel about active neighbours. A single dump session may span +multiple queries, until the response carries less records than requested +or when a record (can contain up to four neighbour entries) is not full. +Current code stops the session when the number of returned records is +zero, which can result in infinite loop in case of high packet rate. + +Fix this by stopping the session according to the above logic. + +Fixes: c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table") +Signed-off-by: Arkadi Sharshevsky +Signed-off-by: Ido Schimmel +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 22 +++++++++++++++++- + 1 file changed, 21 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +@@ -777,6 +777,26 @@ static void mlxsw_sp_router_neigh_rec_pr + } + } + ++static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) ++{ ++ u8 num_rec, last_rec_index, num_entries; ++ ++ num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); ++ last_rec_index = num_rec - 1; ++ ++ if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM) ++ return false; ++ if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) == ++ MLXSW_REG_RAUHTD_TYPE_IPV6) ++ return true; ++ ++ num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, ++ last_rec_index); ++ if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC) ++ return true; ++ return false; ++} ++ + static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) + { + char *rauhtd_pl; +@@ -803,7 +823,7 @@ static int mlxsw_sp_router_neighs_update + for (i = 0; i < num_rec; i++) + mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, + i); +- } while (num_rec); ++ } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); + rtnl_unlock(); + + kfree(rauhtd_pl); diff --git a/queue-4.8/net-__skb_flow_dissect-must-cap-its-return-value.patch b/queue-4.8/net-__skb_flow_dissect-must-cap-its-return-value.patch new file mode 100644 index 00000000000..4ef08878100 --- /dev/null +++ b/queue-4.8/net-__skb_flow_dissect-must-cap-its-return-value.patch @@ -0,0 +1,59 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Eric Dumazet +Date: Wed, 9 Nov 2016 16:04:46 -0800 +Subject: net: __skb_flow_dissect() must cap its return value + +From: Eric Dumazet + + +[ Upstream commit 34fad54c2537f7c99d07375e50cb30aa3c23bd83 ] + +After Tom patch, thoff field could point past the end of the buffer, +this could fool some callers. + +If an skb was provided, skb->len should be the upper limit. +If not, hlen is supposed to be the upper limit. + +Fixes: a6e544b0a88b ("flow_dissector: Jump to exit code in __skb_flow_dissect") +Signed-off-by: Eric Dumazet +Reported-by: Yibin Yang +Acked-by: Willem de Bruijn +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/flow_dissector.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/net/core/flow_dissector.c ++++ b/net/core/flow_dissector.c +@@ -118,7 +118,7 @@ bool __skb_flow_dissect(const struct sk_ + struct flow_dissector_key_tags *key_tags; + struct flow_dissector_key_keyid *key_keyid; + u8 ip_proto = 0; +- bool ret = false; ++ bool ret; + + if (!data) { + data = skb->data; +@@ -481,12 +481,17 @@ ip_proto_again: + out_good: + ret = true; + +-out_bad: ++ key_control->thoff = (u16)nhoff; ++out: + key_basic->n_proto = proto; + key_basic->ip_proto = ip_proto; +- key_control->thoff = (u16)nhoff; + + return ret; ++ ++out_bad: ++ ret = false; ++ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); ++ goto out; + } + EXPORT_SYMBOL(__skb_flow_dissect); + diff --git a/queue-4.8/net-clear-sk_err_soft-in-sk_clone_lock.patch b/queue-4.8/net-clear-sk_err_soft-in-sk_clone_lock.patch new file mode 100644 index 00000000000..e617dd695dc --- /dev/null +++ b/queue-4.8/net-clear-sk_err_soft-in-sk_clone_lock.patch @@ -0,0 +1,34 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Eric Dumazet +Date: Fri, 28 Oct 2016 13:40:24 -0700 +Subject: net: clear sk_err_soft in sk_clone_lock() + +From: Eric Dumazet + + +[ Upstream commit e551c32d57c88923f99f8f010e89ca7ed0735e83 ] + +At accept() time, it is possible the parent has a non zero +sk_err_soft, leftover from a prior error. + +Make sure we do not leave this value in the child, as it +makes future getsockopt(SO_ERROR) calls quite unreliable. + +Signed-off-by: Eric Dumazet +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1563,6 +1563,7 @@ struct sock *sk_clone_lock(const struct + RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); + + newsk->sk_err = 0; ++ newsk->sk_err_soft = 0; + newsk->sk_priority = 0; + newsk->sk_incoming_cpu = raw_smp_processor_id(); + atomic64_set(&newsk->sk_cookie, 0); diff --git a/queue-4.8/net-icmp6_send-should-use-dst-dev-to-determine-l3-domain.patch b/queue-4.8/net-icmp6_send-should-use-dst-dev-to-determine-l3-domain.patch new file mode 100644 index 00000000000..8a0c9b8c1fe --- /dev/null +++ b/queue-4.8/net-icmp6_send-should-use-dst-dev-to-determine-l3-domain.patch @@ -0,0 +1,33 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: David Ahern +Date: Thu, 3 Nov 2016 16:17:26 -0700 +Subject: net: icmp6_send should use dst dev to determine L3 domain + +From: David Ahern + + +[ Upstream commit 5d41ce29e3b91ef305f88d23f72b3359de329cec ] + +icmp6_send is called in response to some event. The skb may not have +the device set (skb->dev is NULL), but it is expected to have a dst set. +Update icmp6_send to use the dst on the skb to determine L3 domain. + +Fixes: ca254490c8dfd ("net: Add VRF support to IPv6 stack") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/icmp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/icmp.c ++++ b/net/ipv6/icmp.c +@@ -448,7 +448,7 @@ static void icmp6_send(struct sk_buff *s + if (__ipv6_addr_needs_scope_id(addr_type)) + iif = skb->dev->ifindex; + else +- iif = l3mdev_master_ifindex(skb->dev); ++ iif = l3mdev_master_ifindex(skb_dst(skb)->dev); + + /* + * Must not send error if the source does not uniquely diff --git a/queue-4.8/net-icmp_route_lookup-should-use-rt-dev-to-determine-l3-domain.patch b/queue-4.8/net-icmp_route_lookup-should-use-rt-dev-to-determine-l3-domain.patch new file mode 100644 index 00000000000..1b09769563e --- /dev/null +++ b/queue-4.8/net-icmp_route_lookup-should-use-rt-dev-to-determine-l3-domain.patch @@ -0,0 +1,43 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: David Ahern +Date: Mon, 7 Nov 2016 12:03:09 -0800 +Subject: net: icmp_route_lookup should use rt dev to determine L3 domain + +From: David Ahern + + +[ Upstream commit 9d1a6c4ea43e48c7880c85971c17939b56832d8a ] + +icmp_send is called in response to some event. The skb may not have +the device set (skb->dev is NULL), but it is expected to have an rt. +Update icmp_route_lookup to use the rt on the skb to determine L3 +domain. + +Fixes: 613d09b30f8b ("net: Use VRF device index for lookups on TX") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/icmp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup( + fl4->flowi4_proto = IPPROTO_ICMP; + fl4->fl4_icmp_type = type; + fl4->fl4_icmp_code = code; +- fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev); ++ fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); + + security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); + rt = __ip_route_output_key_hash(net, fl4, +@@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup( + if (err) + goto relookup_failed; + +- if (inet_addr_type_dev_table(net, skb_in->dev, ++ if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev, + fl4_dec.saddr) == RTN_LOCAL) { + rt2 = __ip_route_output_key(net, &fl4_dec); + if (IS_ERR(rt2)) diff --git a/queue-4.8/net-mangle-zero-checksum-in-skb_checksum_help.patch b/queue-4.8/net-mangle-zero-checksum-in-skb_checksum_help.patch new file mode 100644 index 00000000000..600ef5fb754 --- /dev/null +++ b/queue-4.8/net-mangle-zero-checksum-in-skb_checksum_help.patch @@ -0,0 +1,41 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Eric Dumazet +Date: Sat, 29 Oct 2016 11:02:36 -0700 +Subject: net: mangle zero checksum in skb_checksum_help() + +From: Eric Dumazet + + +[ Upstream commit 4f2e4ad56a65f3b7d64c258e373cb71e8d2499f4 ] + +Sending zero checksum is ok for TCP, but not for UDP. + +UDPv6 receiver should by default drop a frame with a 0 checksum, +and UDPv4 would not verify the checksum and might accept a corrupted +packet. + +Simply replace such checksum by 0xffff, regardless of transport. + +This error was caught on SIT tunnels, but seems generic. + +Signed-off-by: Eric Dumazet +Cc: Maciej Żenczykowski +Cc: Willem de Bruijn +Acked-by: Maciej Żenczykowski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2484,7 +2484,7 @@ int skb_checksum_help(struct sk_buff *sk + goto out; + } + +- *(__sum16 *)(skb->data + offset) = csum_fold(csum); ++ *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; + out_set_summed: + skb->ip_summed = CHECKSUM_NONE; + out: diff --git a/queue-4.8/net-stmmac-fix-lack-of-link-transition-for-fixed-phys.patch b/queue-4.8/net-stmmac-fix-lack-of-link-transition-for-fixed-phys.patch new file mode 100644 index 00000000000..9862f76a689 --- /dev/null +++ b/queue-4.8/net-stmmac-fix-lack-of-link-transition-for-fixed-phys.patch @@ -0,0 +1,50 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Florian Fainelli +Date: Sun, 13 Nov 2016 17:50:35 -0800 +Subject: net: stmmac: Fix lack of link transition for fixed PHYs + +From: Florian Fainelli + + +[ Upstream commit c51e424dc79e1428afc4d697cdb6a07f7af70cbf ] + +Commit 52f95bbfcf72 ("stmmac: fix adjust link call in case of a switch +is attached") added some logic to avoid polling the fixed PHY and +therefore invoking the adjust_link callback more than once, since this +is a fixed PHY and link events won't be generated. + +This works fine the first time, because we start with phydev->irq = +PHY_POLL, so we call adjust_link, then we set phydev->irq = +PHY_IGNORE_INTERRUPT and we stop polling the PHY. + +Now, if we called ndo_close(), which calls both phy_stop() and does an +explicit netif_carrier_off(), we end up with a link down. Upon calling +ndo_open() again, despite starting the PHY state machine, we have +PHY_IGNORE_INTERRUPT set, and we generate no link event at all, so the +link is permanently down. + +Fixes: 52f95bbfcf72 ("stmmac: fix adjust link call in case of a switch is attached") +Signed-off-by: Florian Fainelli +Acked-by: Giuseppe Cavallaro +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -871,6 +871,13 @@ static int stmmac_init_phy(struct net_de + return -ENODEV; + } + ++ /* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid ++ * subsequent PHY polling, make sure we force a link transition if ++ * we have a UP/DOWN/UP transition ++ */ ++ if (phydev->is_pseudo_fixed_link) ++ phydev->irq = PHY_POLL; ++ + pr_debug("stmmac_init_phy: %s: attached to PHY (UID 0x%x)" + " Link = %d\n", dev->name, phydev->phy_id, phydev->link); + diff --git a/queue-4.8/revert-bnx2-reset-device-during-driver-initialization.patch b/queue-4.8/revert-bnx2-reset-device-during-driver-initialization.patch new file mode 100644 index 00000000000..c8847cd32c2 --- /dev/null +++ b/queue-4.8/revert-bnx2-reset-device-during-driver-initialization.patch @@ -0,0 +1,67 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Baoquan He +Date: Sun, 13 Nov 2016 13:01:32 +0800 +Subject: Revert "bnx2: Reset device during driver initialization" + +From: Baoquan He + + +[ Upstream commit 5d0d4b91bf627f14f95167b738d524156c9d440b ] + +This reverts commit 3e1be7ad2d38c6bd6aeef96df9bd0a7822f4e51c. + +When people build bnx2 driver into kernel, it will fail to detect +and load firmware because firmware is contained in initramfs and +initramfs has not been uncompressed yet during do_initcalls. So +revert commit 3e1be7a and work out a new way in the later patch. + +Signed-off-by: Baoquan He +Acked-by: Paul Menzel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnx2.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnx2.c ++++ b/drivers/net/ethernet/broadcom/bnx2.c +@@ -6356,6 +6356,10 @@ bnx2_open(struct net_device *dev) + struct bnx2 *bp = netdev_priv(dev); + int rc; + ++ rc = bnx2_request_firmware(bp); ++ if (rc < 0) ++ goto out; ++ + netif_carrier_off(dev); + + bnx2_disable_int(bp); +@@ -6424,6 +6428,7 @@ open_err: + bnx2_free_irq(bp); + bnx2_free_mem(bp); + bnx2_del_napi(bp); ++ bnx2_release_firmware(bp); + goto out; + } + +@@ -8570,12 +8575,6 @@ bnx2_init_one(struct pci_dev *pdev, cons + + pci_set_drvdata(pdev, dev); + +- rc = bnx2_request_firmware(bp); +- if (rc < 0) +- goto error; +- +- +- bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET); + memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN); + + dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | +@@ -8608,7 +8607,6 @@ bnx2_init_one(struct pci_dev *pdev, cons + return 0; + + error: +- bnx2_release_firmware(bp); + pci_iounmap(pdev, bp->regview); + pci_release_regions(pdev); + pci_disable_device(pdev); diff --git a/queue-4.8/revert-include-uapi-linux-atm_zatm.h-include-linux-time.h.patch b/queue-4.8/revert-include-uapi-linux-atm_zatm.h-include-linux-time.h.patch new file mode 100644 index 00000000000..435f86782aa --- /dev/null +++ b/queue-4.8/revert-include-uapi-linux-atm_zatm.h-include-linux-time.h.patch @@ -0,0 +1,57 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Mike Frysinger +Date: Thu, 10 Nov 2016 19:08:39 -0500 +Subject: Revert "include/uapi/linux/atm_zatm.h: include linux/time.h" + +From: Mike Frysinger + + +[ Upstream commit 7b5b74efcca00f15c2aec1dc7175bfe34b6ec643 ] + +This reverts commit cf00713a655d ("include/uapi/linux/atm_zatm.h: include +linux/time.h"). + +This attempted to fix userspace breakage that no longer existed when +the patch was merged. Almost one year earlier, commit 70ba07b675b5 +("atm: remove 'struct zatm_t_hist'") deleted the struct in question. + +After this patch was merged, we now have to deal with people being +unable to include this header in conjunction with standard C library +headers like stdlib.h (which linux-atm does). Example breakage: +x86_64-pc-linux-gnu-gcc -DHAVE_CONFIG_H -I. -I../.. -I./../q2931 -I./../saal \ + -I. -DCPPFLAGS_TEST -I../../src/include -O2 -march=native -pipe -g \ + -frecord-gcc-switches -freport-bug -Wimplicit-function-declaration \ + -Wnonnull -Wstrict-aliasing -Wparentheses -Warray-bounds \ + -Wfree-nonheap-object -Wreturn-local-addr -fno-strict-aliasing -Wall \ + -Wshadow -Wpointer-arith -Wwrite-strings -Wstrict-prototypes -c zntune.c +In file included from /usr/include/linux/atm_zatm.h:17:0, + from zntune.c:17: +/usr/include/linux/time.h:9:8: error: redefinition of ‘struct timespec’ + struct timespec { + ^ +In file included from /usr/include/sys/select.h:43:0, + from /usr/include/sys/types.h:219, + from /usr/include/stdlib.h:314, + from zntune.c:9: +/usr/include/time.h:120:8: note: originally defined here + struct timespec + ^ + +Signed-off-by: Mike Frysinger +Acked-by: Mikko Rapeli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/atm_zatm.h | 1 - + 1 file changed, 1 deletion(-) + +--- a/include/uapi/linux/atm_zatm.h ++++ b/include/uapi/linux/atm_zatm.h +@@ -14,7 +14,6 @@ + + #include + #include +-#include + + #define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc) + /* get pool statistics */ diff --git a/queue-4.8/sctp-assign-assoc_id-earlier-in-__sctp_connect.patch b/queue-4.8/sctp-assign-assoc_id-earlier-in-__sctp_connect.patch new file mode 100644 index 00000000000..f265c1f090b --- /dev/null +++ b/queue-4.8/sctp-assign-assoc_id-earlier-in-__sctp_connect.patch @@ -0,0 +1,57 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Marcelo Ricardo Leitner +Date: Thu, 3 Nov 2016 17:03:41 -0200 +Subject: sctp: assign assoc_id earlier in __sctp_connect + +From: Marcelo Ricardo Leitner + + +[ Upstream commit 7233bc84a3aeda835d334499dc00448373caf5c0 ] + +sctp_wait_for_connect() currently already holds the asoc to keep it +alive during the sleep, in case another thread release it. But Andrey +Konovalov and Dmitry Vyukov reported an use-after-free in such +situation. + +Problem is that __sctp_connect() doesn't get a ref on the asoc and will +do a read on the asoc after calling sctp_wait_for_connect(), but by then +another thread may have closed it and the _put on sctp_wait_for_connect +will actually release it, causing the use-after-free. + +Fix is, instead of doing the read after waiting for the connect, do it +before so, and avoid this issue as the socket is still locked by then. +There should be no issue on returning the asoc id in case of failure as +the application shouldn't trust on that number in such situations +anyway. + +This issue doesn't exist in sctp_sendmsg() path. + +Reported-by: Dmitry Vyukov +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Signed-off-by: Marcelo Ricardo Leitner +Reviewed-by: Xin Long +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *s + + timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); + +- err = sctp_wait_for_connect(asoc, &timeo); +- if ((err == 0 || err == -EINPROGRESS) && assoc_id) ++ if (assoc_id) + *assoc_id = asoc->assoc_id; ++ err = sctp_wait_for_connect(asoc, &timeo); ++ /* Note: the asoc may be freed after the return of ++ * sctp_wait_for_connect. ++ */ + + /* Don't free association on exit. */ + asoc = NULL; diff --git a/queue-4.8/sctp-change-sk-state-only-when-it-has-assocs-in-sctp_shutdown.patch b/queue-4.8/sctp-change-sk-state-only-when-it-has-assocs-in-sctp_shutdown.patch new file mode 100644 index 00000000000..fbbf40d7bd8 --- /dev/null +++ b/queue-4.8/sctp-change-sk-state-only-when-it-has-assocs-in-sctp_shutdown.patch @@ -0,0 +1,63 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Xin Long +Date: Sun, 13 Nov 2016 21:44:37 +0800 +Subject: sctp: change sk state only when it has assocs in sctp_shutdown + +From: Xin Long + + +[ Upstream commit 5bf35ddfee052d44f39ebaa395d87101c8918405 ] + +Now when users shutdown a sock with SEND_SHUTDOWN in sctp, even if +this sock has no connection (assoc), sk state would be changed to +SCTP_SS_CLOSING, which is not as we expect. + +Besides, after that if users try to listen on this sock, kernel +could even panic when it dereference sctp_sk(sk)->bind_hash in +sctp_inet_listen, as bind_hash is null when sock has no assoc. + +This patch is to move sk state change after checking sk assocs +is not empty, and also merge these two if() conditions and reduce +indent level. + +Fixes: d46e416c11c8 ("sctp: sctp should change socket state when shutdown is received") +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -4281,19 +4281,18 @@ static void sctp_shutdown(struct sock *s + { + struct net *net = sock_net(sk); + struct sctp_endpoint *ep; +- struct sctp_association *asoc; + + if (!sctp_style(sk, TCP)) + return; + +- if (how & SEND_SHUTDOWN) { ++ ep = sctp_sk(sk)->ep; ++ if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) { ++ struct sctp_association *asoc; ++ + sk->sk_state = SCTP_SS_CLOSING; +- ep = sctp_sk(sk)->ep; +- if (!list_empty(&ep->asocs)) { +- asoc = list_entry(ep->asocs.next, +- struct sctp_association, asocs); +- sctp_primitive_SHUTDOWN(net, asoc, NULL); +- } ++ asoc = list_entry(ep->asocs.next, ++ struct sctp_association, asocs); ++ sctp_primitive_SHUTDOWN(net, asoc, NULL); + } + } + diff --git a/queue-4.8/series b/queue-4.8/series new file mode 100644 index 00000000000..0760e2daac0 --- /dev/null +++ b/queue-4.8/series @@ -0,0 +1,29 @@ +dctcp-avoid-bogus-doubling-of-cwnd-after-loss.patch +net-clear-sk_err_soft-in-sk_clone_lock.patch +net-mangle-zero-checksum-in-skb_checksum_help.patch +bgmac-stop-clearing-dma-receive-control-register-right-after-it-is-set.patch +ip6_tunnel-clear-ip6cb-in-ip6tunnel_xmit.patch +tcp-fix-potential-memory-corruption.patch +ipv4-allow-local-fragmentation-in-ip_finish_output_gso.patch +tcp-fix-return-value-for-partial-writes.patch +dccp-do-not-release-listeners-too-soon.patch +dccp-do-not-send-reset-to-already-closed-sockets.patch +dccp-fix-out-of-bound-access-in-dccp_v4_err.patch +ipv6-dccp-fix-out-of-bound-access-in-dccp_v6_err.patch +ipv6-dccp-add-missing-bind_conflict-to-dccp_ipv6_mapped.patch +sctp-assign-assoc_id-earlier-in-__sctp_connect.patch +bpf-fix-htab-map-destruction-when-extra-reserve-is-in-use.patch +net-icmp6_send-should-use-dst-dev-to-determine-l3-domain.patch +fib_trie-correct-proc-net-route-off-by-one-error.patch +sock-fix-sendmmsg-for-partial-sendmsg.patch +net-icmp_route_lookup-should-use-rt-dev-to-determine-l3-domain.patch +net-__skb_flow_dissect-must-cap-its-return-value.patch +ipv4-use-new_gw-for-redirect-neigh-lookup.patch +tcp-take-care-of-truncations-done-by-sk_filter.patch +revert-include-uapi-linux-atm_zatm.h-include-linux-time.h.patch +mlxsw-spectrum-fix-refcount-bug-on-span-entries.patch +mlxsw-spectrum_router-correctly-dump-neighbour-activity.patch +revert-bnx2-reset-device-during-driver-initialization.patch +bnx2-wait-for-in-flight-dma-to-complete-at-probe-stage.patch +sctp-change-sk-state-only-when-it-has-assocs-in-sctp_shutdown.patch +net-stmmac-fix-lack-of-link-transition-for-fixed-phys.patch diff --git a/queue-4.8/sock-fix-sendmmsg-for-partial-sendmsg.patch b/queue-4.8/sock-fix-sendmmsg-for-partial-sendmsg.patch new file mode 100644 index 00000000000..4de715503a5 --- /dev/null +++ b/queue-4.8/sock-fix-sendmmsg-for-partial-sendmsg.patch @@ -0,0 +1,49 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Soheil Hassas Yeganeh +Date: Fri, 4 Nov 2016 15:36:49 -0400 +Subject: sock: fix sendmmsg for partial sendmsg + +From: Soheil Hassas Yeganeh + + +[ Upstream commit 3023898b7d4aac65987bd2f485cc22390aae6f78 ] + +Do not send the next message in sendmmsg for partial sendmsg +invocations. + +sendmmsg assumes that it can continue sending the next message +when the return value of the individual sendmsg invocations +is positive. It results in corrupting the data for TCP, +SCTP, and UNIX streams. + +For example, sendmmsg([["abcd"], ["efgh"]]) can result in a stream +of "aefgh" if the first sendmsg invocation sends only the first +byte while the second sendmsg goes through. + +Datagram sockets either send the entire datagram or fail, so +this patch affects only sockets of type SOCK_STREAM and +SOCK_SEQPACKET. + +Fixes: 228e548e6020 ("net: Add sendmmsg socket system call") +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: Eric Dumazet +Signed-off-by: Willem de Bruijn +Signed-off-by: Neal Cardwell +Acked-by: Maciej Żenczykowski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/socket.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/socket.c ++++ b/net/socket.c +@@ -2041,6 +2041,8 @@ int __sys_sendmmsg(int fd, struct mmsghd + if (err) + break; + ++datagrams; ++ if (msg_data_left(&msg_sys)) ++ break; + cond_resched(); + } + diff --git a/queue-4.8/tcp-fix-potential-memory-corruption.patch b/queue-4.8/tcp-fix-potential-memory-corruption.patch new file mode 100644 index 00000000000..cb330909b4d --- /dev/null +++ b/queue-4.8/tcp-fix-potential-memory-corruption.patch @@ -0,0 +1,40 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Eric Dumazet +Date: Wed, 2 Nov 2016 07:53:17 -0700 +Subject: tcp: fix potential memory corruption + +From: Eric Dumazet + + +[ Upstream commit ac9e70b17ecd7c6e933ff2eaf7ab37429e71bf4d ] + +Imagine initial value of max_skb_frags is 17, and last +skb in write queue has 15 frags. + +Then max_skb_frags is lowered to 14 or smaller value. + +tcp_sendmsg() will then be allowed to add additional page frags +and eventually go past MAX_SKB_FRAGS, overflowing struct +skb_shared_info. + +Fixes: 5f74f82ea34c ("net:Add sysctl_max_skb_frags") +Signed-off-by: Eric Dumazet +Cc: Hans Westgaard Ry +Cc: Håkon Bugge +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1219,7 +1219,7 @@ new_segment: + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { +- if (i == sysctl_max_skb_frags || !sg) { ++ if (i >= sysctl_max_skb_frags || !sg) { + tcp_mark_push(tp, skb); + goto new_segment; + } diff --git a/queue-4.8/tcp-fix-return-value-for-partial-writes.patch b/queue-4.8/tcp-fix-return-value-for-partial-writes.patch new file mode 100644 index 00000000000..cd04b656b41 --- /dev/null +++ b/queue-4.8/tcp-fix-return-value-for-partial-writes.patch @@ -0,0 +1,48 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Eric Dumazet +Date: Wed, 2 Nov 2016 14:41:50 -0700 +Subject: tcp: fix return value for partial writes + +From: Eric Dumazet + + +[ Upstream commit 79d8665b9545e128637c51cf7febde9c493b6481 ] + +After my commit, tcp_sendmsg() might restart its loop after +processing socket backlog. + +If sk_err is set, we blindly return an error, even though we +copied data to user space before. + +We should instead return number of bytes that could be copied, +otherwise user space might resend data and corrupt the stream. + +This might happen if another thread is using recvmsg(MSG_ERRQUEUE) +to process timestamps. + +Issue was diagnosed by Soheil and Willem, big kudos to them ! + +Fixes: d41a69f1d390f ("tcp: make tcp_sendmsg() aware of socket backlog") +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Cc: Soheil Hassas Yeganeh +Cc: Yuchung Cheng +Cc: Neal Cardwell +Tested-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1145,7 +1145,7 @@ restart: + + err = -EPIPE; + if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) +- goto out_err; ++ goto do_error; + + sg = !!(sk->sk_route_caps & NETIF_F_SG); + diff --git a/queue-4.8/tcp-take-care-of-truncations-done-by-sk_filter.patch b/queue-4.8/tcp-take-care-of-truncations-done-by-sk_filter.patch new file mode 100644 index 00000000000..516cee533ad --- /dev/null +++ b/queue-4.8/tcp-take-care-of-truncations-done-by-sk_filter.patch @@ -0,0 +1,102 @@ +From foo@baz Fri Nov 18 11:35:46 CET 2016 +From: Eric Dumazet +Date: Thu, 10 Nov 2016 13:12:35 -0800 +Subject: tcp: take care of truncations done by sk_filter() + +From: Eric Dumazet + + +[ Upstream commit ac6e780070e30e4c35bd395acfe9191e6268bdd3 ] + +With syzkaller help, Marco Grassi found a bug in TCP stack, +crashing in tcp_collapse() + +Root cause is that sk_filter() can truncate the incoming skb, +but TCP stack was not really expecting this to happen. +It probably was expecting a simple DROP or ACCEPT behavior. + +We first need to make sure no part of TCP header could be removed. +Then we need to adjust TCP_SKB_CB(skb)->end_seq + +Many thanks to syzkaller team and Marco for giving us a reproducer. + +Signed-off-by: Eric Dumazet +Reported-by: Marco Grassi +Reported-by: Vladis Dronov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 1 + + net/ipv4/tcp_ipv4.c | 19 ++++++++++++++++++- + net/ipv6/tcp_ipv6.c | 6 ++++-- + 3 files changed, 23 insertions(+), 3 deletions(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1164,6 +1164,7 @@ static inline void tcp_prequeue_init(str + } + + bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); ++int tcp_filter(struct sock *sk, struct sk_buff *skb); + + #undef STATE_TRACE + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1537,6 +1537,21 @@ bool tcp_prequeue(struct sock *sk, struc + } + EXPORT_SYMBOL(tcp_prequeue); + ++int tcp_filter(struct sock *sk, struct sk_buff *skb) ++{ ++ struct tcphdr *th = (struct tcphdr *)skb->data; ++ unsigned int eaten = skb->len; ++ int err; ++ ++ err = sk_filter_trim_cap(sk, skb, th->doff * 4); ++ if (!err) { ++ eaten -= skb->len; ++ TCP_SKB_CB(skb)->end_seq -= eaten; ++ } ++ return err; ++} ++EXPORT_SYMBOL(tcp_filter); ++ + /* + * From tcp_input.c + */ +@@ -1648,8 +1663,10 @@ process: + + nf_reset(skb); + +- if (sk_filter(sk, skb)) ++ if (tcp_filter(sk, skb)) + goto discard_and_relse; ++ th = (const struct tcphdr *)skb->data; ++ iph = ip_hdr(skb); + + skb->dev = NULL; + +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1228,7 +1228,7 @@ static int tcp_v6_do_rcv(struct sock *sk + if (skb->protocol == htons(ETH_P_IP)) + return tcp_v4_do_rcv(sk, skb); + +- if (sk_filter(sk, skb)) ++ if (tcp_filter(sk, skb)) + goto discard; + + /* +@@ -1455,8 +1455,10 @@ process: + if (tcp_v6_inbound_md5_hash(sk, skb)) + goto discard_and_relse; + +- if (sk_filter(sk, skb)) ++ if (tcp_filter(sk, skb)) + goto discard_and_relse; ++ th = (const struct tcphdr *)skb->data; ++ hdr = ipv6_hdr(skb); + + skb->dev = NULL; + -- 2.47.3