From 2ee025e9091ad90b5a2ce363dd8aedfa4946aa0c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 18 Mar 2017 22:05:07 +0800 Subject: [PATCH] 4.10-stable patches added patches: act_connmark-avoid-crashing-on-malformed-nlattrs-with-null-parms.patch amd-xgbe-be-sure-to-set-mdio-modes-on-device-re-start.patch amd-xgbe-don-t-overwrite-sfp-phy-mod_absent-settings.patch amd-xgbe-enable-irqs-only-if-napi_complete_done-is-true.patch amd-xgbe-stop-the-phy-before-releasing-interrupts.patch bonding-use-eth_max_mtu-as-max-mtu.patch bridge-drop-netfilter-fake-rtable-unconditionally.patch dccp-fix-memory-leak-during-tear-down-of-unsuccessful-connection-request.patch dccp-fix-use-after-free-in-dccp_feat_activate_values.patch dccp-tcp-fix-routing-redirect-race.patch dccp-unlock-sock-before-calling-sk_free.patch geneve-lock-rcu-on-tx-path.patch ipv4-add-missing-initialization-for-flowi4_uid.patch ipv4-mask-tos-for-input-route.patch ipv6-avoid-write-to-a-possibly-cloned-skb.patch ipv6-make-ecmp-route-replacement-less-greedy.patch ipv6-orphan-skbs-in-reassembly-unit.patch l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch mlxsw-spectrum_router-avoid-potential-packets-loss.patch mpls-do-not-decrement-alive-counter-for-unregister-events.patch mpls-send-route-delete-notifications-when-router-module-is-unloaded.patch net-bridge-allow-ipv6-when-multicast-flood-is-disabled.patch net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch net-fix-socket-refcounting-in-skb_complete_tx_timestamp.patch net-fix-socket-refcounting-in-skb_complete_wifi_ack.patch net-mlx5e-do-not-reduce-lro-wqe-size-when-not-using-build_skb.patch net-mlx5e-fix-broken-cqe-compression-initialization.patch net-mlx5e-fix-wrong-cqe-decompression.patch net-mlx5e-register-unregister-vport-representors-on-interface-attach-detach.patch net-mlx5e-update-mpwqe-stride-size-when-modifying-cqe-compress-state.patch net-net_enable_timestamp-can-be-called-from-irq-contexts.patch net-sched-act_skbmod-remove-unneeded-rcu_read_unlock-in-tcf_skbmod_dump.patch net-sched-actions-decrement-module-reference-count-after-table-flush.patch net-tunnel-set-inner-protocol-in-network-gro-hooks.patch net-use-net-count-to-check-whether-a-netns-is-alive-or-not.patch sctp-deny-peeloff-operation-on-asocs-with-threads-sleeping-on-it.patch sctp-set-sin_port-for-addr-param-when-checking-duplicate-address.patch series strparser-destroy-workqueue-on-module-exit.patch tcp-dccp-block-bh-for-syn-processing.patch tcp-fix-various-issues-for-sockets-morphing-to-listen-state.patch team-use-eth_max_mtu-as-max-mtu.patch tun-fix-premature-pollout-notification-on-tun-devices.patch uapi-fix-linux-packet_diag.h-userspace-compilation-error.patch vrf-fix-use-after-free-in-vrf_xmit.patch vti6-return-gre_key-for-vti6.patch vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch vxlan-don-t-allow-overwrite-of-config-src-addr.patch vxlan-lock-rcu-on-tx-path.patch --- ...on-malformed-nlattrs-with-null-parms.patch | 57 +++++ ...to-set-mdio-modes-on-device-re-start.patch | 72 ++++++ ...verwrite-sfp-phy-mod_absent-settings.patch | 32 +++ ...s-only-if-napi_complete_done-is-true.patch | 52 ++++ ...-the-phy-before-releasing-interrupts.patch | 41 +++ .../bonding-use-eth_max_mtu-as-max-mtu.patch | 34 +++ ...etfilter-fake-rtable-unconditionally.patch | 83 ++++++ ...n-of-unsuccessful-connection-request.patch | 33 +++ ...er-free-in-dccp_feat_activate_values.patch | 237 ++++++++++++++++++ .../dccp-tcp-fix-routing-redirect-race.patch | 160 ++++++++++++ ...p-unlock-sock-before-calling-sk_free.patch | 81 ++++++ queue-4.10/geneve-lock-rcu-on-tx-path.patch | 38 +++ ...issing-initialization-for-flowi4_uid.patch | 68 +++++ .../ipv4-mask-tos-for-input-route.patch | 35 +++ ...avoid-write-to-a-possibly-cloned-skb.patch | 65 +++++ ...e-ecmp-route-replacement-less-greedy.patch | 71 ++++++ .../ipv6-orphan-skbs-in-reassembly-unit.patch | 172 +++++++++++++ ...-free-caused-by-l2tp_ip_backlog_recv.patch | 32 +++ ..._router-avoid-potential-packets-loss.patch | 78 ++++++ ...-alive-counter-for-unregister-events.patch | 53 ++++ ...tions-when-router-module-is-unloaded.patch | 33 +++ ...pv6-when-multicast-flood-is-disabled.patch | 37 +++ ...-the-user-buffer-in-packet_bind_spkt.patch | 108 ++++++++ ...ounting-in-skb_complete_tx_timestamp.patch | 53 ++++ ...refcounting-in-skb_complete_wifi_ack.patch | 62 +++++ ...ro-wqe-size-when-not-using-build_skb.patch | 56 +++++ ...roken-cqe-compression-initialization.patch | 58 +++++ ...et-mlx5e-fix-wrong-cqe-decompression.patch | 71 ++++++ ...resentors-on-interface-attach-detach.patch | 90 +++++++ ...ze-when-modifying-cqe-compress-state.patch | 84 +++++++ ...tamp-can-be-called-from-irq-contexts.patch | 96 +++++++ ...d-rcu_read_unlock-in-tcf_skbmod_dump.patch | 30 +++ ...le-reference-count-after-table-flush.patch | 94 +++++++ ...-inner-protocol-in-network-gro-hooks.patch | 70 ++++++ ...heck-whether-a-netns-is-alive-or-not.patch | 54 ++++ ...on-asocs-with-threads-sleeping-on-it.patch | 66 +++++ ...aram-when-checking-duplicate-address.patch | 53 ++++ queue-4.10/series | 48 ++++ ...ser-destroy-workqueue-on-module-exit.patch | 29 +++ ...tcp-dccp-block-bh-for-syn-processing.patch | 206 +++++++++++++++ ...for-sockets-morphing-to-listen-state.patch | 74 ++++++ .../team-use-eth_max_mtu-as-max-mtu.patch | 38 +++ ...-pollout-notification-on-tun-devices.patch | 78 ++++++ ...t_diag.h-userspace-compilation-error.patch | 44 ++++ .../vrf-fix-use-after-free-in-vrf_xmit.patch | 56 +++++ queue-4.10/vti6-return-gre_key-for-vti6.patch | 33 +++ ...alidate-vxlan-id-against-vxlan_n_vid.patch | 33 +++ ...t-allow-overwrite-of-config-src-addr.patch | 94 +++++++ queue-4.10/vxlan-lock-rcu-on-tx-path.patch | 66 +++++ queue-4.9/series | 41 +++ 50 files changed, 3449 insertions(+) create mode 100644 queue-4.10/act_connmark-avoid-crashing-on-malformed-nlattrs-with-null-parms.patch create mode 100644 queue-4.10/amd-xgbe-be-sure-to-set-mdio-modes-on-device-re-start.patch create mode 100644 queue-4.10/amd-xgbe-don-t-overwrite-sfp-phy-mod_absent-settings.patch create mode 100644 queue-4.10/amd-xgbe-enable-irqs-only-if-napi_complete_done-is-true.patch create mode 100644 queue-4.10/amd-xgbe-stop-the-phy-before-releasing-interrupts.patch create mode 100644 queue-4.10/bonding-use-eth_max_mtu-as-max-mtu.patch create mode 100644 queue-4.10/bridge-drop-netfilter-fake-rtable-unconditionally.patch create mode 100644 queue-4.10/dccp-fix-memory-leak-during-tear-down-of-unsuccessful-connection-request.patch create mode 100644 queue-4.10/dccp-fix-use-after-free-in-dccp_feat_activate_values.patch create mode 100644 queue-4.10/dccp-tcp-fix-routing-redirect-race.patch create mode 100644 queue-4.10/dccp-unlock-sock-before-calling-sk_free.patch create mode 100644 queue-4.10/geneve-lock-rcu-on-tx-path.patch create mode 100644 queue-4.10/ipv4-add-missing-initialization-for-flowi4_uid.patch create mode 100644 queue-4.10/ipv4-mask-tos-for-input-route.patch create mode 100644 queue-4.10/ipv6-avoid-write-to-a-possibly-cloned-skb.patch create mode 100644 queue-4.10/ipv6-make-ecmp-route-replacement-less-greedy.patch create mode 100644 queue-4.10/ipv6-orphan-skbs-in-reassembly-unit.patch create mode 100644 queue-4.10/l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch create mode 100644 queue-4.10/mlxsw-spectrum_router-avoid-potential-packets-loss.patch create mode 100644 queue-4.10/mpls-do-not-decrement-alive-counter-for-unregister-events.patch create mode 100644 queue-4.10/mpls-send-route-delete-notifications-when-router-module-is-unloaded.patch create mode 100644 queue-4.10/net-bridge-allow-ipv6-when-multicast-flood-is-disabled.patch create mode 100644 queue-4.10/net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch create mode 100644 queue-4.10/net-fix-socket-refcounting-in-skb_complete_tx_timestamp.patch create mode 100644 queue-4.10/net-fix-socket-refcounting-in-skb_complete_wifi_ack.patch create mode 100644 queue-4.10/net-mlx5e-do-not-reduce-lro-wqe-size-when-not-using-build_skb.patch create mode 100644 queue-4.10/net-mlx5e-fix-broken-cqe-compression-initialization.patch create mode 100644 queue-4.10/net-mlx5e-fix-wrong-cqe-decompression.patch create mode 100644 queue-4.10/net-mlx5e-register-unregister-vport-representors-on-interface-attach-detach.patch create mode 100644 queue-4.10/net-mlx5e-update-mpwqe-stride-size-when-modifying-cqe-compress-state.patch create mode 100644 queue-4.10/net-net_enable_timestamp-can-be-called-from-irq-contexts.patch create mode 100644 queue-4.10/net-sched-act_skbmod-remove-unneeded-rcu_read_unlock-in-tcf_skbmod_dump.patch create mode 100644 queue-4.10/net-sched-actions-decrement-module-reference-count-after-table-flush.patch create mode 100644 queue-4.10/net-tunnel-set-inner-protocol-in-network-gro-hooks.patch create mode 100644 queue-4.10/net-use-net-count-to-check-whether-a-netns-is-alive-or-not.patch create mode 100644 queue-4.10/sctp-deny-peeloff-operation-on-asocs-with-threads-sleeping-on-it.patch create mode 100644 queue-4.10/sctp-set-sin_port-for-addr-param-when-checking-duplicate-address.patch create mode 100644 queue-4.10/series create mode 100644 queue-4.10/strparser-destroy-workqueue-on-module-exit.patch create mode 100644 queue-4.10/tcp-dccp-block-bh-for-syn-processing.patch create mode 100644 queue-4.10/tcp-fix-various-issues-for-sockets-morphing-to-listen-state.patch create mode 100644 queue-4.10/team-use-eth_max_mtu-as-max-mtu.patch create mode 100644 queue-4.10/tun-fix-premature-pollout-notification-on-tun-devices.patch create mode 100644 queue-4.10/uapi-fix-linux-packet_diag.h-userspace-compilation-error.patch create mode 100644 queue-4.10/vrf-fix-use-after-free-in-vrf_xmit.patch create mode 100644 queue-4.10/vti6-return-gre_key-for-vti6.patch create mode 100644 queue-4.10/vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch create mode 100644 queue-4.10/vxlan-don-t-allow-overwrite-of-config-src-addr.patch create mode 100644 queue-4.10/vxlan-lock-rcu-on-tx-path.patch create mode 100644 queue-4.9/series diff --git a/queue-4.10/act_connmark-avoid-crashing-on-malformed-nlattrs-with-null-parms.patch b/queue-4.10/act_connmark-avoid-crashing-on-malformed-nlattrs-with-null-parms.patch new file mode 100644 index 00000000000..09f778d6ea1 --- /dev/null +++ b/queue-4.10/act_connmark-avoid-crashing-on-malformed-nlattrs-with-null-parms.patch @@ -0,0 +1,57 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Etienne Noss +Date: Fri, 10 Mar 2017 16:55:32 +0100 +Subject: act_connmark: avoid crashing on malformed nlattrs with null parms + +From: Etienne Noss + + +[ Upstream commit 52491c7607c5527138095edf44c53169dc1ddb82 ] + +tcf_connmark_init does not check in its configuration if TCA_CONNMARK_PARMS +is set, resulting in a null pointer dereference when trying to access it. + +[501099.043007] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 +[501099.043039] IP: [] tcf_connmark_init+0x8b/0x180 [act_connmark] +... +[501099.044334] Call Trace: +[501099.044345] [] ? tcf_action_init_1+0x198/0x1b0 +[501099.044363] [] ? tcf_action_init+0xb0/0x120 +[501099.044380] [] ? tcf_exts_validate+0xc4/0x110 +[501099.044398] [] ? u32_set_parms+0xa7/0x270 [cls_u32] +[501099.044417] [] ? u32_change+0x680/0x87b [cls_u32] +[501099.044436] [] ? tc_ctl_tfilter+0x4dd/0x8a0 +[501099.044454] [] ? security_capable+0x41/0x60 +[501099.044471] [] ? rtnetlink_rcv_msg+0xe1/0x220 +[501099.044490] [] ? rtnl_newlink+0x870/0x870 +[501099.044507] [] ? netlink_rcv_skb+0xa1/0xc0 +[501099.044524] [] ? rtnetlink_rcv+0x24/0x30 +[501099.044541] [] ? netlink_unicast+0x184/0x230 +[501099.044558] [] ? netlink_sendmsg+0x2f8/0x3b0 +[501099.044576] [] ? sock_sendmsg+0x30/0x40 +[501099.044592] [] ? SYSC_sendto+0xd3/0x150 +[501099.044608] [] ? __do_page_fault+0x2d1/0x510 +[501099.044626] [] ? system_call_fast_compare_end+0xc/0x9b + +Fixes: 22a5dc0e5e3e ("net: sched: Introduce connmark action") +Signed-off-by: Étienne Noss +Signed-off-by: Victorien Molle +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_connmark.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/sched/act_connmark.c ++++ b/net/sched/act_connmark.c +@@ -113,6 +113,9 @@ static int tcf_connmark_init(struct net + if (ret < 0) + return ret; + ++ if (!tb[TCA_CONNMARK_PARMS]) ++ return -EINVAL; ++ + parm = nla_data(tb[TCA_CONNMARK_PARMS]); + + if (!tcf_hash_check(tn, parm->index, a, bind)) { diff --git a/queue-4.10/amd-xgbe-be-sure-to-set-mdio-modes-on-device-re-start.patch b/queue-4.10/amd-xgbe-be-sure-to-set-mdio-modes-on-device-re-start.patch new file mode 100644 index 00000000000..3a4a72946d8 --- /dev/null +++ b/queue-4.10/amd-xgbe-be-sure-to-set-mdio-modes-on-device-re-start.patch @@ -0,0 +1,72 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: "Lendacky, Thomas" +Date: Tue, 28 Feb 2017 15:03:01 -0600 +Subject: amd-xgbe: Be sure to set MDIO modes on device (re)start + +From: "Lendacky, Thomas" + + +[ Upstream commit b42c6761fd1651f564491b53016046c9ebf0b2a9 ] + +The MDIO register mode is set when the device is probed. But when the +device is brought down and then back up, the MDIO register mode has been +reset. Be sure to reset the mode during device startup and only change +the mode of the address specified. + +Signed-off-by: Tom Lendacky +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 2 +- + drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 22 ++++++++++++++++++++++ + 2 files changed, 23 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +@@ -1323,7 +1323,7 @@ static int xgbe_read_ext_mii_regs(struct + static int xgbe_set_ext_mii_mode(struct xgbe_prv_data *pdata, unsigned int port, + enum xgbe_mdio_mode mode) + { +- unsigned int reg_val = 0; ++ unsigned int reg_val = XGMAC_IOREAD(pdata, MAC_MDIOCL22R); + + switch (mode) { + case XGBE_MDIO_MODE_CL22: +--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +@@ -875,6 +875,16 @@ static int xgbe_phy_find_phy_device(stru + !phy_data->sfp_phy_avail) + return 0; + ++ /* Set the proper MDIO mode for the PHY */ ++ ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->mdio_addr, ++ phy_data->phydev_mode); ++ if (ret) { ++ netdev_err(pdata->netdev, ++ "mdio port/clause not compatible (%u/%u)\n", ++ phy_data->mdio_addr, phy_data->phydev_mode); ++ return ret; ++ } ++ + /* Create and connect to the PHY device */ + phydev = get_phy_device(phy_data->mii, phy_data->mdio_addr, + (phy_data->phydev_mode == XGBE_MDIO_MODE_CL45)); +@@ -2722,6 +2732,18 @@ static int xgbe_phy_start(struct xgbe_pr + if (ret) + return ret; + ++ /* Set the proper MDIO mode for the re-driver */ ++ if (phy_data->redrv && !phy_data->redrv_if) { ++ ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->redrv_addr, ++ XGBE_MDIO_MODE_CL22); ++ if (ret) { ++ netdev_err(pdata->netdev, ++ "redriver mdio port not compatible (%u)\n", ++ phy_data->redrv_addr); ++ return ret; ++ } ++ } ++ + /* Start in highest supported mode */ + xgbe_phy_set_mode(pdata, phy_data->start_mode); + diff --git a/queue-4.10/amd-xgbe-don-t-overwrite-sfp-phy-mod_absent-settings.patch b/queue-4.10/amd-xgbe-don-t-overwrite-sfp-phy-mod_absent-settings.patch new file mode 100644 index 00000000000..ccba0979673 --- /dev/null +++ b/queue-4.10/amd-xgbe-don-t-overwrite-sfp-phy-mod_absent-settings.patch @@ -0,0 +1,32 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: "Lendacky, Thomas" +Date: Tue, 28 Feb 2017 15:03:10 -0600 +Subject: amd-xgbe: Don't overwrite SFP PHY mod_absent settings + +From: "Lendacky, Thomas" + + +[ Upstream commit 2697ea5a859b83ca49511dcfd98daf42584eb3cf ] + +If an SFP module is not present, xgbe_phy_sfp_phy_settings() should +return after applying the default settings. Currently there is no return +statement and the default settings are overwritten. + +Signed-off-by: Tom Lendacky +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +@@ -716,6 +716,8 @@ static void xgbe_phy_sfp_phy_settings(st + pdata->phy.duplex = DUPLEX_UNKNOWN; + pdata->phy.autoneg = AUTONEG_ENABLE; + pdata->phy.advertising = pdata->phy.supported; ++ ++ return; + } + + pdata->phy.advertising &= ~ADVERTISED_Autoneg; diff --git a/queue-4.10/amd-xgbe-enable-irqs-only-if-napi_complete_done-is-true.patch b/queue-4.10/amd-xgbe-enable-irqs-only-if-napi_complete_done-is-true.patch new file mode 100644 index 00000000000..5893294473c --- /dev/null +++ b/queue-4.10/amd-xgbe-enable-irqs-only-if-napi_complete_done-is-true.patch @@ -0,0 +1,52 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: "Lendacky, Thomas" +Date: Thu, 9 Mar 2017 17:48:23 -0600 +Subject: amd-xgbe: Enable IRQs only if napi_complete_done() is true + +From: "Lendacky, Thomas" + + +[ Upstream commit d7aba644ffdebf756e51e26a2229055211838e89 ] + +Depending on the hardware, the amd-xgbe driver may use disable_irq_nosync() +and enable_irq() when an interrupt is received to process Rx packets. If +the napi_complete_done() return value isn't checked an unbalanced enable +for the IRQ could result, generating a warning stack trace. + +Update the driver to only enable interrupts if napi_complete_done() returns +true. + +Reported-by: Jeremy Linton +Signed-off-by: Tom Lendacky +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +@@ -2274,10 +2274,7 @@ static int xgbe_one_poll(struct napi_str + processed = xgbe_rx_poll(channel, budget); + + /* If we processed everything, we are done */ +- if (processed < budget) { +- /* Turn off polling */ +- napi_complete_done(napi, processed); +- ++ if ((processed < budget) && napi_complete_done(napi, processed)) { + /* Enable Tx and Rx interrupts */ + if (pdata->channel_irq_mode) + xgbe_enable_rx_tx_int(pdata, channel); +@@ -2319,10 +2316,7 @@ static int xgbe_all_poll(struct napi_str + } while ((processed < budget) && (processed != last_processed)); + + /* If we processed everything, we are done */ +- if (processed < budget) { +- /* Turn off polling */ +- napi_complete_done(napi, processed); +- ++ if ((processed < budget) && napi_complete_done(napi, processed)) { + /* Enable Tx and Rx interrupts */ + xgbe_enable_rx_tx_ints(pdata); + } diff --git a/queue-4.10/amd-xgbe-stop-the-phy-before-releasing-interrupts.patch b/queue-4.10/amd-xgbe-stop-the-phy-before-releasing-interrupts.patch new file mode 100644 index 00000000000..24b2a63ecfd --- /dev/null +++ b/queue-4.10/amd-xgbe-stop-the-phy-before-releasing-interrupts.patch @@ -0,0 +1,41 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: "Lendacky, Thomas" +Date: Tue, 28 Feb 2017 15:02:51 -0600 +Subject: amd-xgbe: Stop the PHY before releasing interrupts + +From: "Lendacky, Thomas" + + +[ Upstream commit 402168b4c2dc0734b8fbd282eff77da0275c5129 ] + +Some configurations require the use of the hardware's MDIO support to +communicate with external PHYs. The MDIO commands indicate completion +through the device interrupt. When bringing down the device the interrupts +were released before stopping the external PHY, resulting in MDIO command +timeouts. Move the stopping of the PHY to before the releasing of the +interrupts. + +Signed-off-by: Tom Lendacky +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +@@ -1131,12 +1131,12 @@ static void xgbe_stop(struct xgbe_prv_da + hw_if->disable_tx(pdata); + hw_if->disable_rx(pdata); + ++ phy_if->phy_stop(pdata); ++ + xgbe_free_irqs(pdata); + + xgbe_napi_disable(pdata, 1); + +- phy_if->phy_stop(pdata); +- + hw_if->exit(pdata); + + channel = pdata->channel; diff --git a/queue-4.10/bonding-use-eth_max_mtu-as-max-mtu.patch b/queue-4.10/bonding-use-eth_max_mtu-as-max-mtu.patch new file mode 100644 index 00000000000..aa0ee555dae --- /dev/null +++ b/queue-4.10/bonding-use-eth_max_mtu-as-max-mtu.patch @@ -0,0 +1,34 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: WANG Cong +Date: Thu, 2 Mar 2017 12:24:36 -0800 +Subject: bonding: use ETH_MAX_MTU as max mtu + +From: WANG Cong + + +[ Upstream commit 31c05415f5b471fd333fe42629788364faea8e0d ] + +This restores the ability of setting bond device's mtu to 9000. + +Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra") +Reported-by: daznis@gmail.com +Reported-by: Brad Campbell +Cc: Jarod Wilson +Signed-off-by: Cong Wang +Signed-off-by: Jay Vosburgh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -4185,6 +4185,7 @@ void bond_setup(struct net_device *bond_ + + /* Initialize the device entry points */ + ether_setup(bond_dev); ++ bond_dev->max_mtu = ETH_MAX_MTU; + bond_dev->netdev_ops = &bond_netdev_ops; + bond_dev->ethtool_ops = &bond_ethtool_ops; + diff --git a/queue-4.10/bridge-drop-netfilter-fake-rtable-unconditionally.patch b/queue-4.10/bridge-drop-netfilter-fake-rtable-unconditionally.patch new file mode 100644 index 00000000000..1625ab5b6e7 --- /dev/null +++ b/queue-4.10/bridge-drop-netfilter-fake-rtable-unconditionally.patch @@ -0,0 +1,83 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Florian Westphal +Date: Mon, 13 Mar 2017 17:38:17 +0100 +Subject: bridge: drop netfilter fake rtable unconditionally + +From: Florian Westphal + + +[ Upstream commit a13b2082ece95247779b9995c4e91b4246bed023 ] + +Andreas reports kernel oops during rmmod of the br_netfilter module. +Hannes debugged the oops down to a NULL rt6info->rt6i_indev. + +Problem is that br_netfilter has the nasty concept of adding a fake +rtable to skb->dst; this happens in a br_netfilter prerouting hook. + +A second hook (in bridge LOCAL_IN) is supposed to remove these again +before the skb is handed up the stack. + +However, on module unload hooks get unregistered which means an +skb could traverse the prerouting hook that attaches the fake_rtable, +while the 'fake rtable remove' hook gets removed from the hooklist +immediately after. + +Fixes: 34666d467cbf1e2e3c7 ("netfilter: bridge: move br_netfilter out of the core") +Reported-by: Andreas Karis +Debugged-by: Hannes Frederic Sowa +Signed-off-by: Florian Westphal +Acked-by: Pablo Neira Ayuso +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_input.c | 1 + + net/bridge/br_netfilter_hooks.c | 21 --------------------- + 2 files changed, 1 insertion(+), 21 deletions(-) + +--- a/net/bridge/br_input.c ++++ b/net/bridge/br_input.c +@@ -29,6 +29,7 @@ EXPORT_SYMBOL(br_should_route_hook); + static int + br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) + { ++ br_drop_fake_rtable(skb); + return netif_receive_skb(skb); + } + +--- a/net/bridge/br_netfilter_hooks.c ++++ b/net/bridge/br_netfilter_hooks.c +@@ -521,21 +521,6 @@ static unsigned int br_nf_pre_routing(vo + } + + +-/* PF_BRIDGE/LOCAL_IN ************************************************/ +-/* The packet is locally destined, which requires a real +- * dst_entry, so detach the fake one. On the way up, the +- * packet would pass through PRE_ROUTING again (which already +- * took place when the packet entered the bridge), but we +- * register an IPv4 PRE_ROUTING 'sabotage' hook that will +- * prevent this from happening. */ +-static unsigned int br_nf_local_in(void *priv, +- struct sk_buff *skb, +- const struct nf_hook_state *state) +-{ +- br_drop_fake_rtable(skb); +- return NF_ACCEPT; +-} +- + /* PF_BRIDGE/FORWARD *************************************************/ + static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) + { +@@ -908,12 +893,6 @@ static struct nf_hook_ops br_nf_ops[] __ + .priority = NF_BR_PRI_BRNF, + }, + { +- .hook = br_nf_local_in, +- .pf = NFPROTO_BRIDGE, +- .hooknum = NF_BR_LOCAL_IN, +- .priority = NF_BR_PRI_BRNF, +- }, +- { + .hook = br_nf_forward_ip, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_FORWARD, diff --git a/queue-4.10/dccp-fix-memory-leak-during-tear-down-of-unsuccessful-connection-request.patch b/queue-4.10/dccp-fix-memory-leak-during-tear-down-of-unsuccessful-connection-request.patch new file mode 100644 index 00000000000..56c58e2b581 --- /dev/null +++ b/queue-4.10/dccp-fix-memory-leak-during-tear-down-of-unsuccessful-connection-request.patch @@ -0,0 +1,33 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Hannes Frederic Sowa +Date: Mon, 13 Mar 2017 00:01:30 +0100 +Subject: dccp: fix memory leak during tear-down of unsuccessful connection request + +From: Hannes Frederic Sowa + + +[ Upstream commit 72ef9c4125c7b257e3a714d62d778ab46583d6a3 ] + +This patch fixes a memory leak, which happens if the connection request +is not fulfilled between parsing the DCCP options and handling the SYN +(because e.g. the backlog is full), because we forgot to free the +list of ack vectors. + +Reported-by: Jianwen Ji +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ccids/ccid2.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/dccp/ccids/ccid2.c ++++ b/net/dccp/ccids/ccid2.c +@@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock + for (i = 0; i < hc->tx_seqbufc; i++) + kfree(hc->tx_seqbuf[i]); + hc->tx_seqbufc = 0; ++ dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); + } + + static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) diff --git a/queue-4.10/dccp-fix-use-after-free-in-dccp_feat_activate_values.patch b/queue-4.10/dccp-fix-use-after-free-in-dccp_feat_activate_values.patch new file mode 100644 index 00000000000..f5425973de6 --- /dev/null +++ b/queue-4.10/dccp-fix-use-after-free-in-dccp_feat_activate_values.patch @@ -0,0 +1,237 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Eric Dumazet +Date: Sun, 5 Mar 2017 10:52:16 -0800 +Subject: dccp: fix use-after-free in dccp_feat_activate_values + +From: Eric Dumazet + + +[ Upstream commit 62f8f4d9066c1c6f2474845d1ca7e2891f2ae3fd ] + +Dmitry reported crashes in DCCP stack [1] + +Problem here is that when I got rid of listener spinlock, I missed the +fact that DCCP stores a complex state in struct dccp_request_sock, +while TCP does not. + +Since multiple cpus could access it at the same time, we need to add +protection. + +[1] +BUG: KASAN: use-after-free in dccp_feat_activate_values+0x967/0xab0 +net/dccp/feat.c:1541 at addr ffff88003713be68 +Read of size 8 by task syz-executor2/8457 +CPU: 2 PID: 8457 Comm: syz-executor2 Not tainted 4.10.0-rc7+ #127 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 +Call Trace: + + __dump_stack lib/dump_stack.c:15 [inline] + dump_stack+0x292/0x398 lib/dump_stack.c:51 + kasan_object_err+0x1c/0x70 mm/kasan/report.c:162 + print_address_description mm/kasan/report.c:200 [inline] + kasan_report_error mm/kasan/report.c:289 [inline] + kasan_report.part.1+0x20e/0x4e0 mm/kasan/report.c:311 + kasan_report mm/kasan/report.c:332 [inline] + __asan_report_load8_noabort+0x29/0x30 mm/kasan/report.c:332 + dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541 + dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121 + dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457 + dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186 + dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711 + ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 + NF_HOOK include/linux/netfilter.h:257 [inline] + ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 + dst_input include/net/dst.h:507 [inline] + ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 + NF_HOOK include/linux/netfilter.h:257 [inline] + ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 + __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 + __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 + process_backlog+0xe5/0x6c0 net/core/dev.c:4839 + napi_poll net/core/dev.c:5202 [inline] + net_rx_action+0xe70/0x1900 net/core/dev.c:5267 + __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 + do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 + + do_softirq.part.17+0x1e8/0x230 kernel/softirq.c:328 + do_softirq kernel/softirq.c:176 [inline] + __local_bh_enable_ip+0x1f2/0x200 kernel/softirq.c:181 + local_bh_enable include/linux/bottom_half.h:31 [inline] + rcu_read_unlock_bh include/linux/rcupdate.h:971 [inline] + ip6_finish_output2+0xbb0/0x23d0 net/ipv6/ip6_output.c:123 + ip6_finish_output+0x302/0x960 net/ipv6/ip6_output.c:148 + NF_HOOK_COND include/linux/netfilter.h:246 [inline] + ip6_output+0x1cb/0x8d0 net/ipv6/ip6_output.c:162 + ip6_xmit+0xcdf/0x20d0 include/net/dst.h:501 + inet6_csk_xmit+0x320/0x5f0 net/ipv6/inet6_connection_sock.c:179 + dccp_transmit_skb+0xb09/0x1120 net/dccp/output.c:141 + dccp_xmit_packet+0x215/0x760 net/dccp/output.c:280 + dccp_write_xmit+0x168/0x1d0 net/dccp/output.c:362 + dccp_sendmsg+0x79c/0xb10 net/dccp/proto.c:796 + inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 + sock_sendmsg_nosec net/socket.c:635 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:645 + SYSC_sendto+0x660/0x810 net/socket.c:1687 + SyS_sendto+0x40/0x50 net/socket.c:1655 + entry_SYSCALL_64_fastpath+0x1f/0xc2 +RIP: 0033:0x4458b9 +RSP: 002b:00007f8ceb77bb58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c +RAX: ffffffffffffffda RBX: 0000000000000017 RCX: 00000000004458b9 +RDX: 0000000000000023 RSI: 0000000020e60000 RDI: 0000000000000017 +RBP: 00000000006e1b90 R08: 00000000200f9fe1 R09: 0000000000000020 +R10: 0000000000008010 R11: 0000000000000282 R12: 00000000007080a8 +R13: 0000000000000000 R14: 00007f8ceb77c9c0 R15: 00007f8ceb77c700 +Object at ffff88003713be50, in cache kmalloc-64 size: 64 +Allocated: +PID = 8446 + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 + save_stack+0x43/0xd0 mm/kasan/kasan.c:502 + set_track mm/kasan/kasan.c:514 [inline] + kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605 + kmem_cache_alloc_trace+0x82/0x270 mm/slub.c:2738 + kmalloc include/linux/slab.h:490 [inline] + dccp_feat_entry_new+0x214/0x410 net/dccp/feat.c:467 + dccp_feat_push_change+0x38/0x220 net/dccp/feat.c:487 + __feat_register_sp+0x223/0x2f0 net/dccp/feat.c:741 + dccp_feat_propagate_ccid+0x22b/0x2b0 net/dccp/feat.c:949 + dccp_feat_server_ccid_dependencies+0x1b3/0x250 net/dccp/feat.c:1012 + dccp_make_response+0x1f1/0xc90 net/dccp/output.c:423 + dccp_v6_send_response+0x4ec/0xc20 net/dccp/ipv6.c:217 + dccp_v6_conn_request+0xaba/0x11b0 net/dccp/ipv6.c:377 + dccp_rcv_state_process+0x51e/0x1650 net/dccp/input.c:606 + dccp_v6_do_rcv+0x213/0x350 net/dccp/ipv6.c:632 + sk_backlog_rcv include/net/sock.h:893 [inline] + __sk_receive_skb+0x36f/0xcc0 net/core/sock.c:479 + dccp_v6_rcv+0xba5/0x1d00 net/dccp/ipv6.c:742 + ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 + NF_HOOK include/linux/netfilter.h:257 [inline] + ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 + dst_input include/net/dst.h:507 [inline] + ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 + NF_HOOK include/linux/netfilter.h:257 [inline] + ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 + __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 + __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 + process_backlog+0xe5/0x6c0 net/core/dev.c:4839 + napi_poll net/core/dev.c:5202 [inline] + net_rx_action+0xe70/0x1900 net/core/dev.c:5267 + __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 +Freed: +PID = 15 + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 + save_stack+0x43/0xd0 mm/kasan/kasan.c:502 + set_track mm/kasan/kasan.c:514 [inline] + kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578 + slab_free_hook mm/slub.c:1355 [inline] + slab_free_freelist_hook mm/slub.c:1377 [inline] + slab_free mm/slub.c:2954 [inline] + kfree+0xe8/0x2b0 mm/slub.c:3874 + dccp_feat_entry_destructor.part.4+0x48/0x60 net/dccp/feat.c:418 + dccp_feat_entry_destructor net/dccp/feat.c:416 [inline] + dccp_feat_list_pop net/dccp/feat.c:541 [inline] + dccp_feat_activate_values+0x57f/0xab0 net/dccp/feat.c:1543 + dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121 + dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457 + dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186 + dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711 + ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 + NF_HOOK include/linux/netfilter.h:257 [inline] + ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 + dst_input include/net/dst.h:507 [inline] + ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 + NF_HOOK include/linux/netfilter.h:257 [inline] + ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 + __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 + __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 + process_backlog+0xe5/0x6c0 net/core/dev.c:4839 + napi_poll net/core/dev.c:5202 [inline] + net_rx_action+0xe70/0x1900 net/core/dev.c:5267 + __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 +Memory state around the buggy address: + ffff88003713bd00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff88003713bd80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +>ffff88003713be00: fc fc fc fc fc fc fc fc fc fc fb fb fb fb fb fb + ^ + +Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") +Signed-off-by: Eric Dumazet +Reported-by: Dmitry Vyukov +Tested-by: Dmitry Vyukov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/dccp.h | 1 + + net/dccp/minisocks.c | 24 ++++++++++++++++-------- + 2 files changed, 17 insertions(+), 8 deletions(-) + +--- a/include/linux/dccp.h ++++ b/include/linux/dccp.h +@@ -163,6 +163,7 @@ struct dccp_request_sock { + __u64 dreq_isr; + __u64 dreq_gsr; + __be32 dreq_service; ++ spinlock_t dreq_lock; + struct list_head dreq_featneg; + __u32 dreq_timestamp_echo; + __u32 dreq_timestamp_time; +--- a/net/dccp/minisocks.c ++++ b/net/dccp/minisocks.c +@@ -146,6 +146,13 @@ struct sock *dccp_check_req(struct sock + struct dccp_request_sock *dreq = dccp_rsk(req); + bool own_req; + ++ /* TCP/DCCP listeners became lockless. ++ * DCCP stores complex state in its request_sock, so we need ++ * a protection for them, now this code runs without being protected ++ * by the parent (listener) lock. ++ */ ++ spin_lock_bh(&dreq->dreq_lock); ++ + /* Check for retransmitted REQUEST */ + if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { + +@@ -160,7 +167,7 @@ struct sock *dccp_check_req(struct sock + inet_rtx_syn_ack(sk, req); + } + /* Network Duplicate, discard packet */ +- return NULL; ++ goto out; + } + + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; +@@ -186,20 +193,20 @@ struct sock *dccp_check_req(struct sock + + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, + req, &own_req); +- if (!child) +- goto listen_overflow; +- +- return inet_csk_complete_hashdance(sk, child, req, own_req); ++ if (child) { ++ child = inet_csk_complete_hashdance(sk, child, req, own_req); ++ goto out; ++ } + +-listen_overflow: +- dccp_pr_debug("listen_overflow!\n"); + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; + drop: + if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) + req->rsk_ops->send_reset(sk, skb); + + inet_csk_reqsk_queue_drop(sk, req); +- return NULL; ++out: ++ spin_unlock_bh(&dreq->dreq_lock); ++ return child; + } + + EXPORT_SYMBOL_GPL(dccp_check_req); +@@ -250,6 +257,7 @@ int dccp_reqsk_init(struct request_sock + { + struct dccp_request_sock *dreq = dccp_rsk(req); + ++ spin_lock_init(&dreq->dreq_lock); + inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; + inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); + inet_rsk(req)->acked = 0; diff --git a/queue-4.10/dccp-tcp-fix-routing-redirect-race.patch b/queue-4.10/dccp-tcp-fix-routing-redirect-race.patch new file mode 100644 index 00000000000..2d40cbfc9fc --- /dev/null +++ b/queue-4.10/dccp-tcp-fix-routing-redirect-race.patch @@ -0,0 +1,160 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Jon Maxwell +Date: Fri, 10 Mar 2017 16:40:33 +1100 +Subject: dccp/tcp: fix routing redirect race + +From: Jon Maxwell + + +[ Upstream commit 45caeaa5ac0b4b11784ac6f932c0ad4c6b67cda0 ] + +As Eric Dumazet pointed out this also needs to be fixed in IPv6. +v2: Contains the IPv6 tcp/Ipv6 dccp patches as well. + +We have seen a few incidents lately where a dst_enty has been freed +with a dangling TCP socket reference (sk->sk_dst_cache) pointing to that +dst_entry. If the conditions/timings are right a crash then ensues when the +freed dst_entry is referenced later on. A Common crashing back trace is: + + #8 [] page_fault at ffffffff8163e648 + [exception RIP: __tcp_ack_snd_check+74] +. +. + #9 [] tcp_rcv_established at ffffffff81580b64 +#10 [] tcp_v4_do_rcv at ffffffff8158b54a +#11 [] tcp_v4_rcv at ffffffff8158cd02 +#12 [] ip_local_deliver_finish at ffffffff815668f4 +#13 [] ip_local_deliver at ffffffff81566bd9 +#14 [] ip_rcv_finish at ffffffff8156656d +#15 [] ip_rcv at ffffffff81566f06 +#16 [] __netif_receive_skb_core at ffffffff8152b3a2 +#17 [] __netif_receive_skb at ffffffff8152b608 +#18 [] netif_receive_skb at ffffffff8152b690 +#19 [] vmxnet3_rq_rx_complete at ffffffffa015eeaf [vmxnet3] +#20 [] vmxnet3_poll_rx_only at ffffffffa015f32a [vmxnet3] +#21 [] net_rx_action at ffffffff8152bac2 +#22 [] __do_softirq at ffffffff81084b4f +#23 [] call_softirq at ffffffff8164845c +#24 [] do_softirq at ffffffff81016fc5 +#25 [] irq_exit at ffffffff81084ee5 +#26 [] do_IRQ at ffffffff81648ff8 + +Of course it may happen with other NIC drivers as well. + +It's found the freed dst_entry here: + + 224 static bool tcp_in_quickack_mode(struct sock *sk)↩ + 225 {↩ + 226 ▹ const struct inet_connection_sock *icsk = inet_csk(sk);↩ + 227 ▹ const struct dst_entry *dst = __sk_dst_get(sk);↩ + 228 ↩ + 229 ▹ return (dst && dst_metric(dst, RTAX_QUICKACK)) ||↩ + 230 ▹ ▹ (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);↩ + 231 }↩ + +But there are other backtraces attributed to the same freed dst_entry in +netfilter code as well. + +All the vmcores showed 2 significant clues: + +- Remote hosts behind the default gateway had always been redirected to a +different gateway. A rtable/dst_entry will be added for that host. Making +more dst_entrys with lower reference counts. Making this more probable. + +- All vmcores showed a postitive LockDroppedIcmps value, e.g: + +LockDroppedIcmps 267 + +A closer look at the tcp_v4_err() handler revealed that do_redirect() will run +regardless of whether user space has the socket locked. This can result in a +race condition where the same dst_entry cached in sk->sk_dst_entry can be +decremented twice for the same socket via: + +do_redirect()->__sk_dst_check()-> dst_release(). + +Which leads to the dst_entry being prematurely freed with another socket +pointing to it via sk->sk_dst_cache and a subsequent crash. + +To fix this skip do_redirect() if usespace has the socket locked. Instead let +the redirect take place later when user space does not have the socket +locked. + +The dccp/IPv6 code is very similar in this respect, so fixing it there too. + +As Eric Garver pointed out the following commit now invalidates routes. Which +can set the dst->obsolete flag so that ipv4_dst_check() returns null and +triggers the dst_release(). + +Fixes: ceb3320610d6 ("ipv4: Kill routes during PMTU/redirect updates.") +Cc: Eric Garver +Cc: Hannes Sowa +Signed-off-by: Jon Maxwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv4.c | 3 ++- + net/dccp/ipv6.c | 8 +++++--- + net/ipv4/tcp_ipv4.c | 3 ++- + net/ipv6/tcp_ipv6.c | 8 +++++--- + 4 files changed, 14 insertions(+), 8 deletions(-) + +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -289,7 +289,8 @@ static void dccp_v4_err(struct sk_buff * + + switch (type) { + case ICMP_REDIRECT: +- dccp_do_redirect(skb, sk); ++ if (!sock_owned_by_user(sk)) ++ dccp_do_redirect(skb, sk); + goto out; + case ICMP_SOURCE_QUENCH: + /* Just silently ignore these. */ +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -122,10 +122,12 @@ static void dccp_v6_err(struct sk_buff * + np = inet6_sk(sk); + + if (type == NDISC_REDIRECT) { +- struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); ++ if (!sock_owned_by_user(sk)) { ++ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + +- if (dst) +- dst->ops->redirect(dst, sk, skb); ++ if (dst) ++ dst->ops->redirect(dst, sk, skb); ++ } + goto out; + } + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -421,7 +421,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb + + switch (type) { + case ICMP_REDIRECT: +- do_redirect(icmp_skb, sk); ++ if (!sock_owned_by_user(sk)) ++ do_redirect(icmp_skb, sk); + goto out; + case ICMP_SOURCE_QUENCH: + /* Just silently ignore these. */ +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -382,10 +382,12 @@ static void tcp_v6_err(struct sk_buff *s + np = inet6_sk(sk); + + if (type == NDISC_REDIRECT) { +- struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); ++ if (!sock_owned_by_user(sk)) { ++ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + +- if (dst) +- dst->ops->redirect(dst, sk, skb); ++ if (dst) ++ dst->ops->redirect(dst, sk, skb); ++ } + goto out; + } + diff --git a/queue-4.10/dccp-unlock-sock-before-calling-sk_free.patch b/queue-4.10/dccp-unlock-sock-before-calling-sk_free.patch new file mode 100644 index 00000000000..d95ac462d7d --- /dev/null +++ b/queue-4.10/dccp-unlock-sock-before-calling-sk_free.patch @@ -0,0 +1,81 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Arnaldo Carvalho de Melo +Date: Wed, 1 Mar 2017 16:35:07 -0300 +Subject: dccp: Unlock sock before calling sk_free() + +From: Arnaldo Carvalho de Melo + + +[ Upstream commit d5afb6f9b6bb2c57bd0c05e76e12489dc0d037d9 ] + +The code where sk_clone() came from created a new socket and locked it, +but then, on the error path didn't unlock it. + +This problem stayed there for a long while, till b0691c8ee7c2 ("net: +Unlock sock before calling sk_free()") fixed it, but unfortunately the +callers of sk_clone() (now sk_clone_locked()) were not audited and the +one in dccp_create_openreq_child() remained. + +Now in the age of the syskaller fuzzer, this was finally uncovered, as +reported by Dmitry: + + ---- 8< ---- + +I've got the following report while running syzkaller fuzzer on +86292b33d4b7 ("Merge branch 'akpm' (patches from Andrew)") + + [ BUG: held lock freed! ] + 4.10.0+ #234 Not tainted + ------------------------- + syz-executor6/6898 is freeing memory + ffff88006286cac0-ffff88006286d3b7, with a lock still held there! + (slock-AF_INET6){+.-...}, at: [] spin_lock + include/linux/spinlock.h:299 [inline] + (slock-AF_INET6){+.-...}, at: [] + sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504 + 5 locks held by syz-executor6/6898: + #0: (sk_lock-AF_INET6){+.+.+.}, at: [] lock_sock + include/net/sock.h:1460 [inline] + #0: (sk_lock-AF_INET6){+.+.+.}, at: [] + inet_stream_connect+0x44/0xa0 net/ipv4/af_inet.c:681 + #1: (rcu_read_lock){......}, at: [] + inet6_csk_xmit+0x12a/0x5d0 net/ipv6/inet6_connection_sock.c:126 + #2: (rcu_read_lock){......}, at: [] __skb_unlink + include/linux/skbuff.h:1767 [inline] + #2: (rcu_read_lock){......}, at: [] __skb_dequeue + include/linux/skbuff.h:1783 [inline] + #2: (rcu_read_lock){......}, at: [] + process_backlog+0x264/0x730 net/core/dev.c:4835 + #3: (rcu_read_lock){......}, at: [] + ip6_input_finish+0x0/0x1700 net/ipv6/ip6_input.c:59 + #4: (slock-AF_INET6){+.-...}, at: [] spin_lock + include/linux/spinlock.h:299 [inline] + #4: (slock-AF_INET6){+.-...}, at: [] + sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504 + +Fix it just like was done by b0691c8ee7c2 ("net: Unlock sock before calling +sk_free()"). + +Reported-by: Dmitry Vyukov +Cc: Cong Wang +Cc: Eric Dumazet +Cc: Gerrit Renker +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/20170301153510.GE15145@kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/minisocks.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/dccp/minisocks.c ++++ b/net/dccp/minisocks.c +@@ -122,6 +122,7 @@ struct sock *dccp_create_openreq_child(c + /* It is still raw copy of parent, so invalidate + * destructor and make plain sk_free() */ + newsk->sk_destruct = NULL; ++ bh_unlock_sock(newsk); + sk_free(newsk); + return NULL; + } diff --git a/queue-4.10/geneve-lock-rcu-on-tx-path.patch b/queue-4.10/geneve-lock-rcu-on-tx-path.patch new file mode 100644 index 00000000000..113dc223682 --- /dev/null +++ b/queue-4.10/geneve-lock-rcu-on-tx-path.patch @@ -0,0 +1,38 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Jakub Kicinski +Date: Fri, 24 Feb 2017 11:43:37 -0800 +Subject: geneve: lock RCU on TX path + +From: Jakub Kicinski + + +[ Upstream commit a717e3f740803cc88bd5c9a70c93504f6a368663 ] + +There is no guarantees that callers of the TX path will hold +the RCU lock. Grab it explicitly. + +Fixes: fceb9c3e3825 ("geneve: avoid using stale geneve socket.") +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/geneve.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/geneve.c ++++ b/drivers/net/geneve.c +@@ -881,12 +881,14 @@ static netdev_tx_t geneve_xmit(struct sk + info = &geneve->info; + } + ++ rcu_read_lock(); + #if IS_ENABLED(CONFIG_IPV6) + if (info->mode & IP_TUNNEL_INFO_IPV6) + err = geneve6_xmit_skb(skb, dev, geneve, info); + else + #endif + err = geneve_xmit_skb(skb, dev, geneve, info); ++ rcu_read_unlock(); + + if (likely(!err)) + return NETDEV_TX_OK; diff --git a/queue-4.10/ipv4-add-missing-initialization-for-flowi4_uid.patch b/queue-4.10/ipv4-add-missing-initialization-for-flowi4_uid.patch new file mode 100644 index 00000000000..a7edec90590 --- /dev/null +++ b/queue-4.10/ipv4-add-missing-initialization-for-flowi4_uid.patch @@ -0,0 +1,68 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Julian Anastasov +Date: Sun, 26 Feb 2017 15:50:52 +0200 +Subject: ipv4: add missing initialization for flowi4_uid + +From: Julian Anastasov + + +[ Upstream commit 8bcfd0925ef15f072ba1e7bee2c25e9e1b5fd6ca ] + +Avoid matching of random stack value for uid when rules +are looked up on input route or when RP filter is used. +Problem should affect only setups that use ip rules with +uid range. + +Fixes: 622ec2c9d524 ("net: core: add UID to flows, rules, and routes") +Signed-off-by: Julian Anastasov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_frontend.c | 6 +++--- + net/ipv4/route.c | 1 + + 2 files changed, 4 insertions(+), 3 deletions(-) + +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -319,7 +319,7 @@ static int __fib_validate_source(struct + int ret, no_addr; + struct fib_result res; + struct flowi4 fl4; +- struct net *net; ++ struct net *net = dev_net(dev); + bool dev_match; + + fl4.flowi4_oif = 0; +@@ -332,6 +332,7 @@ static int __fib_validate_source(struct + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + fl4.flowi4_tun_key.tun_id = 0; + fl4.flowi4_flags = 0; ++ fl4.flowi4_uid = sock_net_uid(net, NULL); + + no_addr = idev->ifa_list == NULL; + +@@ -339,13 +340,12 @@ static int __fib_validate_source(struct + + trace_fib_validate_source(dev, &fl4); + +- net = dev_net(dev); + if (fib_lookup(net, &fl4, &res, 0)) + goto last_resort; + if (res.type != RTN_UNICAST && + (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) + goto e_inval; +- if (!rpf && !fib_num_tclassid_users(dev_net(dev)) && ++ if (!rpf && !fib_num_tclassid_users(net) && + (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) + goto last_resort; + fib_combine_itag(itag, &res); +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1858,6 +1858,7 @@ static int ip_route_input_slow(struct sk + fl4.flowi4_flags = 0; + fl4.daddr = daddr; + fl4.saddr = saddr; ++ fl4.flowi4_uid = sock_net_uid(net, NULL); + err = fib_lookup(net, &fl4, &res, 0); + if (err != 0) { + if (!IN_DEV_FORWARD(in_dev)) diff --git a/queue-4.10/ipv4-mask-tos-for-input-route.patch b/queue-4.10/ipv4-mask-tos-for-input-route.patch new file mode 100644 index 00000000000..7099db61c70 --- /dev/null +++ b/queue-4.10/ipv4-mask-tos-for-input-route.patch @@ -0,0 +1,35 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Julian Anastasov +Date: Sun, 26 Feb 2017 17:14:35 +0200 +Subject: ipv4: mask tos for input route + +From: Julian Anastasov + + +[ Upstream commit 6e28099d38c0e50d62c1afc054e37e573adf3d21 ] + +Restore the lost masking of TOS in input route code to +allow ip rules to match it properly. + +Problem [1] noticed by Shmulik Ladkani + +[1] http://marc.info/?t=137331755300040&r=1&w=2 + +Fixes: 89aef8921bfb ("ipv4: Delete routing cache.") +Signed-off-by: Julian Anastasov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1991,6 +1991,7 @@ int ip_route_input_noref(struct sk_buff + { + int res; + ++ tos &= IPTOS_RT_MASK; + rcu_read_lock(); + + /* Multicast recognition logic is moved from route cache to here. diff --git a/queue-4.10/ipv6-avoid-write-to-a-possibly-cloned-skb.patch b/queue-4.10/ipv6-avoid-write-to-a-possibly-cloned-skb.patch new file mode 100644 index 00000000000..6cfd4d58c03 --- /dev/null +++ b/queue-4.10/ipv6-avoid-write-to-a-possibly-cloned-skb.patch @@ -0,0 +1,65 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Florian Westphal +Date: Mon, 13 Mar 2017 16:24:28 +0100 +Subject: ipv6: avoid write to a possibly cloned skb + +From: Florian Westphal + + +[ Upstream commit 79e49503efe53a8c51d8b695bedc8a346c5e4a87 ] + +ip6_fragment, in case skb has a fraglist, checks if the +skb is cloned. If it is, it will move to the 'slow path' and allocates +new skbs for each fragment. + +However, right before entering the slowpath loop, it updates the +nexthdr value of the last ipv6 extension header to NEXTHDR_FRAGMENT, +to account for the fragment header that will be inserted in the new +ipv6-fragment skbs. + +In case original skb is cloned this munges nexthdr value of another +skb. Avoid this by doing the nexthdr update for each of the new fragment +skbs separately. + +This was observed with tcpdump on a bridge device where netfilter ipv6 +reassembly is active: tcpdump shows malformed fragment headers as +the l4 header (icmpv6, tcp, etc). is decoded as a fragment header. + +Cc: Hannes Frederic Sowa +Reported-by: Andreas Karis +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -767,13 +767,14 @@ slow_path: + * Fragment the datagram. + */ + +- *prevhdr = NEXTHDR_FRAGMENT; + troom = rt->dst.dev->needed_tailroom; + + /* + * Keep copying data until we run out. + */ + while (left > 0) { ++ u8 *fragnexthdr_offset; ++ + len = left; + /* IF: it doesn't fit, use 'mtu' - the data space left */ + if (len > mtu) +@@ -818,6 +819,10 @@ slow_path: + */ + skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); + ++ fragnexthdr_offset = skb_network_header(frag); ++ fragnexthdr_offset += prevhdr - skb_network_header(skb); ++ *fragnexthdr_offset = NEXTHDR_FRAGMENT; ++ + /* + * Build fragment header. + */ diff --git a/queue-4.10/ipv6-make-ecmp-route-replacement-less-greedy.patch b/queue-4.10/ipv6-make-ecmp-route-replacement-less-greedy.patch new file mode 100644 index 00000000000..64ed1a7c1e5 --- /dev/null +++ b/queue-4.10/ipv6-make-ecmp-route-replacement-less-greedy.patch @@ -0,0 +1,71 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Sabrina Dubroca +Date: Mon, 13 Mar 2017 13:28:09 +0100 +Subject: ipv6: make ECMP route replacement less greedy + +From: Sabrina Dubroca + + +[ Upstream commit 67e194007be08d071294456274dd53e0a04fdf90 ] + +Commit 27596472473a ("ipv6: fix ECMP route replacement") introduced a +loop that removes all siblings of an ECMP route that is being +replaced. However, this loop doesn't stop when it has replaced +siblings, and keeps removing other routes with a higher metric. +We also end up triggering the WARN_ON after the loop, because after +this nsiblings < 0. + +Instead, stop the loop when we have taken care of all routes with the +same metric as the route being replaced. + + Reproducer: + =========== + #!/bin/sh + + ip netns add ns1 + ip netns add ns2 + ip -net ns1 link set lo up + + for x in 0 1 2 ; do + ip link add veth$x netns ns2 type veth peer name eth$x netns ns1 + ip -net ns1 link set eth$x up + ip -net ns2 link set veth$x up + done + + ip -net ns1 -6 r a 2000::/64 nexthop via fe80::0 dev eth0 \ + nexthop via fe80::1 dev eth1 nexthop via fe80::2 dev eth2 + ip -net ns1 -6 r a 2000::/64 via fe80::42 dev eth0 metric 256 + ip -net ns1 -6 r a 2000::/64 via fe80::43 dev eth0 metric 2048 + + echo "before replace, 3 routes" + ip -net ns1 -6 r | grep -v '^fe80\|^ff00' + echo + + ip -net ns1 -6 r c 2000::/64 nexthop via fe80::4 dev eth0 \ + nexthop via fe80::5 dev eth1 nexthop via fe80::6 dev eth2 + + echo "after replace, only 2 routes, metric 2048 is gone" + ip -net ns1 -6 r | grep -v '^fe80\|^ff00' + +Fixes: 27596472473a ("ipv6: fix ECMP route replacement") +Signed-off-by: Sabrina Dubroca +Acked-by: Nicolas Dichtel +Reviewed-by: Xin Long +Reviewed-by: Michal Kubecek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_fib.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -908,6 +908,8 @@ add: + ins = &rt->dst.rt6_next; + iter = *ins; + while (iter) { ++ if (iter->rt6i_metric > rt->rt6i_metric) ++ break; + if (rt6_qualify_for_ecmp(iter)) { + *ins = iter->dst.rt6_next; + fib6_purge_rt(iter, fn, info->nl_net); diff --git a/queue-4.10/ipv6-orphan-skbs-in-reassembly-unit.patch b/queue-4.10/ipv6-orphan-skbs-in-reassembly-unit.patch new file mode 100644 index 00000000000..5c4751ee08a --- /dev/null +++ b/queue-4.10/ipv6-orphan-skbs-in-reassembly-unit.patch @@ -0,0 +1,172 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Eric Dumazet +Date: Wed, 1 Mar 2017 14:45:06 -0800 +Subject: ipv6: orphan skbs in reassembly unit + +From: Eric Dumazet + + +[ Upstream commit 48cac18ecf1de82f76259a54402c3adb7839ad01 ] + +Andrey reported a use-after-free in IPv6 stack. + +Issue here is that we free the socket while it still has skb +in TX path and in some queues. + +It happens here because IPv6 reassembly unit messes skb->truesize, +breaking skb_set_owner_w() badly. + +We fixed a similar issue for IPV4 in commit 8282f27449bf ("inet: frag: +Always orphan skbs inside ip_defrag()") +Acked-by: Joe Stringer + +================================================================== +BUG: KASAN: use-after-free in sock_wfree+0x118/0x120 +Read of size 8 at addr ffff880062da0060 by task a.out/4140 + +page:ffffea00018b6800 count:1 mapcount:0 mapping: (null) +index:0x0 compound_mapcount: 0 +flags: 0x100000000008100(slab|head) +raw: 0100000000008100 0000000000000000 0000000000000000 0000000180130013 +raw: dead000000000100 dead000000000200 ffff88006741f140 0000000000000000 +page dumped because: kasan: bad access detected + +CPU: 0 PID: 4140 Comm: a.out Not tainted 4.10.0-rc3+ #59 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:15 + dump_stack+0x292/0x398 lib/dump_stack.c:51 + describe_address mm/kasan/report.c:262 + kasan_report_error+0x121/0x560 mm/kasan/report.c:370 + kasan_report mm/kasan/report.c:392 + __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:413 + sock_flag ./arch/x86/include/asm/bitops.h:324 + sock_wfree+0x118/0x120 net/core/sock.c:1631 + skb_release_head_state+0xfc/0x250 net/core/skbuff.c:655 + skb_release_all+0x15/0x60 net/core/skbuff.c:668 + __kfree_skb+0x15/0x20 net/core/skbuff.c:684 + kfree_skb+0x16e/0x4e0 net/core/skbuff.c:705 + inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 + inet_frag_put ./include/net/inet_frag.h:133 + nf_ct_frag6_gather+0x1125/0x38b0 net/ipv6/netfilter/nf_conntrack_reasm.c:617 + ipv6_defrag+0x21b/0x350 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 + nf_hook_entry_hookfn ./include/linux/netfilter.h:102 + nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 + nf_hook ./include/linux/netfilter.h:212 + __ip6_local_out+0x52c/0xaf0 net/ipv6/output_core.c:160 + ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 + ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 + ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 + rawv6_push_pending_frames net/ipv6/raw.c:613 + rawv6_sendmsg+0x2cff/0x4130 net/ipv6/raw.c:927 + inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 + sock_sendmsg_nosec net/socket.c:635 + sock_sendmsg+0xca/0x110 net/socket.c:645 + sock_write_iter+0x326/0x620 net/socket.c:848 + new_sync_write fs/read_write.c:499 + __vfs_write+0x483/0x760 fs/read_write.c:512 + vfs_write+0x187/0x530 fs/read_write.c:560 + SYSC_write fs/read_write.c:607 + SyS_write+0xfb/0x230 fs/read_write.c:599 + entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 +RIP: 0033:0x7ff26e6f5b79 +RSP: 002b:00007ff268e0ed98 EFLAGS: 00000206 ORIG_RAX: 0000000000000001 +RAX: ffffffffffffffda RBX: 00007ff268e0f9c0 RCX: 00007ff26e6f5b79 +RDX: 0000000000000010 RSI: 0000000020f50fe1 RDI: 0000000000000003 +RBP: 00007ff26ebc1220 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000 +R13: 00007ff268e0f9c0 R14: 00007ff26efec040 R15: 0000000000000003 + +The buggy address belongs to the object at ffff880062da0000 + which belongs to the cache RAWv6 of size 1504 +The buggy address ffff880062da0060 is located 96 bytes inside + of 1504-byte region [ffff880062da0000, ffff880062da05e0) + +Freed by task 4113: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 + save_stack+0x43/0xd0 mm/kasan/kasan.c:502 + set_track mm/kasan/kasan.c:514 + kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578 + slab_free_hook mm/slub.c:1352 + slab_free_freelist_hook mm/slub.c:1374 + slab_free mm/slub.c:2951 + kmem_cache_free+0xb2/0x2c0 mm/slub.c:2973 + sk_prot_free net/core/sock.c:1377 + __sk_destruct+0x49c/0x6e0 net/core/sock.c:1452 + sk_destruct+0x47/0x80 net/core/sock.c:1460 + __sk_free+0x57/0x230 net/core/sock.c:1468 + sk_free+0x23/0x30 net/core/sock.c:1479 + sock_put ./include/net/sock.h:1638 + sk_common_release+0x31e/0x4e0 net/core/sock.c:2782 + rawv6_close+0x54/0x80 net/ipv6/raw.c:1214 + inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425 + inet6_release+0x50/0x70 net/ipv6/af_inet6.c:431 + sock_release+0x8d/0x1e0 net/socket.c:599 + sock_close+0x16/0x20 net/socket.c:1063 + __fput+0x332/0x7f0 fs/file_table.c:208 + ____fput+0x15/0x20 fs/file_table.c:244 + task_work_run+0x19b/0x270 kernel/task_work.c:116 + exit_task_work ./include/linux/task_work.h:21 + do_exit+0x186b/0x2800 kernel/exit.c:839 + do_group_exit+0x149/0x420 kernel/exit.c:943 + SYSC_exit_group kernel/exit.c:954 + SyS_exit_group+0x1d/0x20 kernel/exit.c:952 + entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 + +Allocated by task 4115: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 + save_stack+0x43/0xd0 mm/kasan/kasan.c:502 + set_track mm/kasan/kasan.c:514 + kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605 + kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544 + slab_post_alloc_hook mm/slab.h:432 + slab_alloc_node mm/slub.c:2708 + slab_alloc mm/slub.c:2716 + kmem_cache_alloc+0x1af/0x250 mm/slub.c:2721 + sk_prot_alloc+0x65/0x2a0 net/core/sock.c:1334 + sk_alloc+0x105/0x1010 net/core/sock.c:1396 + inet6_create+0x44d/0x1150 net/ipv6/af_inet6.c:183 + __sock_create+0x4f6/0x880 net/socket.c:1199 + sock_create net/socket.c:1239 + SYSC_socket net/socket.c:1269 + SyS_socket+0xf9/0x230 net/socket.c:1249 + entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 + +Memory state around the buggy address: + ffff880062d9ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff880062d9ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +>ffff880062da0000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff880062da0080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff880062da0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +================================================================== + +Reported-by: Andrey Konovalov +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/netfilter/nf_conntrack_reasm.c | 1 + + net/openvswitch/conntrack.c | 1 - + 2 files changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/netfilter/nf_conntrack_reasm.c ++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c +@@ -589,6 +589,7 @@ int nf_ct_frag6_gather(struct net *net, + hdr = ipv6_hdr(skb); + fhdr = (struct frag_hdr *)skb_transport_header(skb); + ++ skb_orphan(skb); + fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, + skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); + if (fq == NULL) { +--- a/net/openvswitch/conntrack.c ++++ b/net/openvswitch/conntrack.c +@@ -367,7 +367,6 @@ static int handle_fragments(struct net * + } else if (key->eth.type == htons(ETH_P_IPV6)) { + enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; + +- skb_orphan(skb); + memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); + err = nf_ct_frag6_gather(net, skb, user); + if (err) { diff --git a/queue-4.10/l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch b/queue-4.10/l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch new file mode 100644 index 00000000000..e068923e317 --- /dev/null +++ b/queue-4.10/l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch @@ -0,0 +1,32 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Paul Hüber +Date: Sun, 26 Feb 2017 17:58:19 +0100 +Subject: l2tp: avoid use-after-free caused by l2tp_ip_backlog_recv + +From: Paul Hüber + + +[ Upstream commit 51fb60eb162ab84c5edf2ae9c63cf0b878e5547e ] + +l2tp_ip_backlog_recv may not return -1 if the packet gets dropped. +The return value is passed up to ip_local_deliver_finish, which treats +negative values as an IP protocol number for resubmission. + +Signed-off-by: Paul Hüber +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ip.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/l2tp/l2tp_ip.c ++++ b/net/l2tp/l2tp_ip.c +@@ -381,7 +381,7 @@ static int l2tp_ip_backlog_recv(struct s + drop: + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); + kfree_skb(skb); +- return -1; ++ return 0; + } + + /* Userspace will call sendmsg() on the tunnel socket to send L2TP diff --git a/queue-4.10/mlxsw-spectrum_router-avoid-potential-packets-loss.patch b/queue-4.10/mlxsw-spectrum_router-avoid-potential-packets-loss.patch new file mode 100644 index 00000000000..ee45a9a530d --- /dev/null +++ b/queue-4.10/mlxsw-spectrum_router-avoid-potential-packets-loss.patch @@ -0,0 +1,78 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Ido Schimmel +Date: Tue, 28 Feb 2017 08:55:40 +0100 +Subject: mlxsw: spectrum_router: Avoid potential packets loss + +From: Ido Schimmel + + +[ Upstream commit f7df4923fa986247e93ec2cdff5ca168fff14dcf ] + +When the structure of the LPM tree changes (f.e., due to the addition of +a new prefix), we unbind the old tree and then bind the new one. This +may result in temporary packet loss. + +Instead, overwrite the old binding with the new one. + +Fixes: 6b75c4807db3 ("mlxsw: spectrum_router: Add virtual router management") +Signed-off-by: Ido Schimmel +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 30 ++++++++++++------ + 1 file changed, 20 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +@@ -496,30 +496,40 @@ static int + mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, + struct mlxsw_sp_prefix_usage *req_prefix_usage) + { +- struct mlxsw_sp_lpm_tree *lpm_tree; ++ struct mlxsw_sp_lpm_tree *lpm_tree = vr->lpm_tree; ++ struct mlxsw_sp_lpm_tree *new_tree; ++ int err; + +- if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, +- &vr->lpm_tree->prefix_usage)) ++ if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage)) + return 0; + +- lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, ++ new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, + vr->proto, false); +- if (IS_ERR(lpm_tree)) { ++ if (IS_ERR(new_tree)) { + /* We failed to get a tree according to the required + * prefix usage. However, the current tree might be still good + * for us if our requirement is subset of the prefixes used + * in the tree. + */ + if (mlxsw_sp_prefix_usage_subset(req_prefix_usage, +- &vr->lpm_tree->prefix_usage)) ++ &lpm_tree->prefix_usage)) + return 0; +- return PTR_ERR(lpm_tree); ++ return PTR_ERR(new_tree); + } + +- mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); +- mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); ++ /* Prevent packet loss by overwriting existing binding */ ++ vr->lpm_tree = new_tree; ++ err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); ++ if (err) ++ goto err_tree_bind; ++ mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); ++ ++ return 0; ++ ++err_tree_bind: + vr->lpm_tree = lpm_tree; +- return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); ++ mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree); ++ return err; + } + + static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, diff --git a/queue-4.10/mpls-do-not-decrement-alive-counter-for-unregister-events.patch b/queue-4.10/mpls-do-not-decrement-alive-counter-for-unregister-events.patch new file mode 100644 index 00000000000..c9af7921ad8 --- /dev/null +++ b/queue-4.10/mpls-do-not-decrement-alive-counter-for-unregister-events.patch @@ -0,0 +1,53 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: David Ahern +Date: Fri, 10 Mar 2017 14:11:39 -0800 +Subject: mpls: Do not decrement alive counter for unregister events + +From: David Ahern + + +[ Upstream commit 79099aab38c8f5c746748b066ae74ba984fe2cc8 ] + +Multipath routes can be rendered usesless when a device in one of the +paths is deleted. For example: + +$ ip -f mpls ro ls +100 + nexthop as to 200 via inet 172.16.2.2 dev virt12 + nexthop as to 300 via inet 172.16.3.2 dev br0 +101 + nexthop as to 201 via inet6 2000:2::2 dev virt12 + nexthop as to 301 via inet6 2000:3::2 dev br0 + +$ ip li del br0 + +When br0 is deleted the other hop is not considered in +mpls_select_multipath because of the alive check -- rt_nhn_alive +is 0. + +rt_nhn_alive is decremented once in mpls_ifdown when the device is taken +down (NETDEV_DOWN) and again when it is deleted (NETDEV_UNREGISTER). For +a 2 hop route, deleting one device drops the alive count to 0. Since +devices are taken down before unregistering, the decrement on +NETDEV_UNREGISTER is redundant. + +Fixes: c89359a42e2a4 ("mpls: support for dead routes") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/mpls/af_mpls.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/mpls/af_mpls.c ++++ b/net/mpls/af_mpls.c +@@ -956,7 +956,8 @@ static void mpls_ifdown(struct net_devic + /* fall through */ + case NETDEV_CHANGE: + nh->nh_flags |= RTNH_F_LINKDOWN; +- ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; ++ if (event != NETDEV_UNREGISTER) ++ ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; + break; + } + if (event == NETDEV_UNREGISTER) diff --git a/queue-4.10/mpls-send-route-delete-notifications-when-router-module-is-unloaded.patch b/queue-4.10/mpls-send-route-delete-notifications-when-router-module-is-unloaded.patch new file mode 100644 index 00000000000..4499ac6bd41 --- /dev/null +++ b/queue-4.10/mpls-send-route-delete-notifications-when-router-module-is-unloaded.patch @@ -0,0 +1,33 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: David Ahern +Date: Fri, 10 Mar 2017 09:46:15 -0800 +Subject: mpls: Send route delete notifications when router module is unloaded + +From: David Ahern + + +[ Upstream commit e37791ec1ad785b59022ae211f63a16189bacebf ] + +When the mpls_router module is unloaded, mpls routes are deleted but +notifications are not sent to userspace leaving userspace caches +out of sync. Add the call to mpls_notify_route in mpls_net_exit as +routes are freed. + +Fixes: 0189197f44160 ("mpls: Basic routing support") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/mpls/af_mpls.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/mpls/af_mpls.c ++++ b/net/mpls/af_mpls.c +@@ -1696,6 +1696,7 @@ static void mpls_net_exit(struct net *ne + for (index = 0; index < platform_labels; index++) { + struct mpls_route *rt = rtnl_dereference(platform_label[index]); + RCU_INIT_POINTER(platform_label[index], NULL); ++ mpls_notify_route(net, index, rt, NULL, NULL); + mpls_rt_free(rt); + } + rtnl_unlock(); diff --git a/queue-4.10/net-bridge-allow-ipv6-when-multicast-flood-is-disabled.patch b/queue-4.10/net-bridge-allow-ipv6-when-multicast-flood-is-disabled.patch new file mode 100644 index 00000000000..b81379fac9f --- /dev/null +++ b/queue-4.10/net-bridge-allow-ipv6-when-multicast-flood-is-disabled.patch @@ -0,0 +1,37 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Mike Manning +Date: Wed, 1 Mar 2017 09:55:28 +0000 +Subject: net: bridge: allow IPv6 when multicast flood is disabled + +From: Mike Manning + + +[ Upstream commit 8953de2f02ad7b15e4964c82f9afd60f128e4e98 ] + +Even with multicast flooding turned off, IPv6 ND should still work so +that IPv6 connectivity is provided. Allow this by continuing to flood +multicast traffic originated by us. + +Fixes: b6cb5ac8331b ("net: bridge: add per-port multicast flood flag") +Cc: Nikolay Aleksandrov +Signed-off-by: Mike Manning +Acked-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_forward.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/bridge/br_forward.c ++++ b/net/bridge/br_forward.c +@@ -186,8 +186,9 @@ void br_flood(struct net_bridge *br, str + /* Do not flood unicast traffic to ports that turn it off */ + if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD)) + continue; ++ /* Do not flood if mc off, except for traffic we originate */ + if (pkt_type == BR_PKT_MULTICAST && +- !(p->flags & BR_MCAST_FLOOD)) ++ !(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev) + continue; + + /* Do not flood to ports that enable proxy ARP */ diff --git a/queue-4.10/net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch b/queue-4.10/net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch new file mode 100644 index 00000000000..039d6ecfffe --- /dev/null +++ b/queue-4.10/net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch @@ -0,0 +1,108 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Alexander Potapenko +Date: Wed, 1 Mar 2017 12:57:20 +0100 +Subject: net: don't call strlen() on the user buffer in packet_bind_spkt() + +From: Alexander Potapenko + + +[ Upstream commit 540e2894f7905538740aaf122bd8e0548e1c34a4 ] + +KMSAN (KernelMemorySanitizer, a new error detection tool) reports use of +uninitialized memory in packet_bind_spkt(): +Acked-by: Eric Dumazet + +================================================================== +BUG: KMSAN: use of unitialized memory +CPU: 0 PID: 1074 Comm: packet Not tainted 4.8.0-rc6+ #1891 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs +01/01/2011 + 0000000000000000 ffff88006b6dfc08 ffffffff82559ae8 ffff88006b6dfb48 + ffffffff818a7c91 ffffffff85b9c870 0000000000000092 ffffffff85b9c550 + 0000000000000000 0000000000000092 00000000ec400911 0000000000000002 +Call Trace: + [< inline >] __dump_stack lib/dump_stack.c:15 + [] dump_stack+0x238/0x290 lib/dump_stack.c:51 + [] kmsan_report+0x276/0x2e0 mm/kmsan/kmsan.c:1003 + [] __msan_warning+0x5b/0xb0 +mm/kmsan/kmsan_instr.c:424 + [< inline >] strlen lib/string.c:484 + [] strlcpy+0x9d/0x200 lib/string.c:144 + [] packet_bind_spkt+0x144/0x230 +net/packet/af_packet.c:3132 + [] SYSC_bind+0x40d/0x5f0 net/socket.c:1370 + [] SyS_bind+0x82/0xa0 net/socket.c:1356 + [] entry_SYSCALL_64_fastpath+0x13/0x8f +arch/x86/entry/entry_64.o:? +chained origin: 00000000eba00911 + [] save_stack_trace+0x27/0x50 +arch/x86/kernel/stacktrace.c:67 + [< inline >] kmsan_save_stack_with_flags mm/kmsan/kmsan.c:322 + [< inline >] kmsan_save_stack mm/kmsan/kmsan.c:334 + [] kmsan_internal_chain_origin+0x118/0x1e0 +mm/kmsan/kmsan.c:527 + [] __msan_set_alloca_origin4+0xc3/0x130 +mm/kmsan/kmsan_instr.c:380 + [] SYSC_bind+0x129/0x5f0 net/socket.c:1356 + [] SyS_bind+0x82/0xa0 net/socket.c:1356 + [] entry_SYSCALL_64_fastpath+0x13/0x8f +arch/x86/entry/entry_64.o:? +origin description: ----address@SYSC_bind (origin=00000000eb400911) +================================================================== +(the line numbers are relative to 4.8-rc6, but the bug persists +upstream) + +, when I run the following program as root: + +===================================== + #include + #include + #include + #include + + int main() { + struct sockaddr addr; + memset(&addr, 0xff, sizeof(addr)); + addr.sa_family = AF_PACKET; + int fd = socket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ALL)); + bind(fd, &addr, sizeof(addr)); + return 0; + } +===================================== + +This happens because addr.sa_data copied from the userspace is not +zero-terminated, and copying it with strlcpy() in packet_bind_spkt() +results in calling strlen() on the kernel copy of that non-terminated +buffer. + +Signed-off-by: Alexander Potapenko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -3082,7 +3082,7 @@ static int packet_bind_spkt(struct socke + int addr_len) + { + struct sock *sk = sock->sk; +- char name[15]; ++ char name[sizeof(uaddr->sa_data) + 1]; + + /* + * Check legality +@@ -3090,7 +3090,11 @@ static int packet_bind_spkt(struct socke + + if (addr_len != sizeof(struct sockaddr)) + return -EINVAL; +- strlcpy(name, uaddr->sa_data, sizeof(name)); ++ /* uaddr->sa_data comes from the userspace, it's not guaranteed to be ++ * zero-terminated. ++ */ ++ memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); ++ name[sizeof(uaddr->sa_data)] = 0; + + return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); + } diff --git a/queue-4.10/net-fix-socket-refcounting-in-skb_complete_tx_timestamp.patch b/queue-4.10/net-fix-socket-refcounting-in-skb_complete_tx_timestamp.patch new file mode 100644 index 00000000000..c624c68cc05 --- /dev/null +++ b/queue-4.10/net-fix-socket-refcounting-in-skb_complete_tx_timestamp.patch @@ -0,0 +1,53 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Eric Dumazet +Date: Fri, 3 Mar 2017 21:01:03 -0800 +Subject: net: fix socket refcounting in skb_complete_tx_timestamp() + +From: Eric Dumazet + + +[ Upstream commit 9ac25fc063751379cb77434fef9f3b088cd3e2f7 ] + +TX skbs do not necessarily hold a reference on skb->sk->sk_refcnt +By the time TX completion happens, sk_refcnt might be already 0. + +sock_hold()/sock_put() would then corrupt critical state, like +sk_wmem_alloc and lead to leaks or use after free. + +Fixes: 62bccb8cdb69 ("net-timestamp: Make the clone operation stand-alone from phy timestamping") +Signed-off-by: Eric Dumazet +Cc: Alexander Duyck +Cc: Johannes Berg +Cc: Soheil Hassas Yeganeh +Cc: Willem de Bruijn +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -3824,13 +3824,14 @@ void skb_complete_tx_timestamp(struct sk + if (!skb_may_tx_timestamp(sk, false)) + return; + +- /* take a reference to prevent skb_orphan() from freeing the socket */ +- sock_hold(sk); +- +- *skb_hwtstamps(skb) = *hwtstamps; +- __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); +- +- sock_put(sk); ++ /* Take a reference to prevent skb_orphan() from freeing the socket, ++ * but only if the socket refcount is not zero. ++ */ ++ if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { ++ *skb_hwtstamps(skb) = *hwtstamps; ++ __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); ++ sock_put(sk); ++ } + } + EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); + diff --git a/queue-4.10/net-fix-socket-refcounting-in-skb_complete_wifi_ack.patch b/queue-4.10/net-fix-socket-refcounting-in-skb_complete_wifi_ack.patch new file mode 100644 index 00000000000..7df62af4896 --- /dev/null +++ b/queue-4.10/net-fix-socket-refcounting-in-skb_complete_wifi_ack.patch @@ -0,0 +1,62 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Eric Dumazet +Date: Fri, 3 Mar 2017 21:01:02 -0800 +Subject: net: fix socket refcounting in skb_complete_wifi_ack() + +From: Eric Dumazet + + +[ Upstream commit dd4f10722aeb10f4f582948839f066bebe44e5fb ] + +TX skbs do not necessarily hold a reference on skb->sk->sk_refcnt +By the time TX completion happens, sk_refcnt might be already 0. + +sock_hold()/sock_put() would then corrupt critical state, like +sk_wmem_alloc. + +Fixes: bf7fa551e0ce ("mac80211: Resolve sk_refcnt/sk_wmem_alloc issue in wifi ack path") +Signed-off-by: Eric Dumazet +Cc: Alexander Duyck +Cc: Johannes Berg +Cc: Soheil Hassas Yeganeh +Cc: Willem de Bruijn +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -3889,7 +3889,7 @@ void skb_complete_wifi_ack(struct sk_buf + { + struct sock *sk = skb->sk; + struct sock_exterr_skb *serr; +- int err; ++ int err = 1; + + skb->wifi_acked_valid = 1; + skb->wifi_acked = acked; +@@ -3899,14 +3899,15 @@ void skb_complete_wifi_ack(struct sk_buf + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; + +- /* take a reference to prevent skb_orphan() from freeing the socket */ +- sock_hold(sk); +- +- err = sock_queue_err_skb(sk, skb); ++ /* Take a reference to prevent skb_orphan() from freeing the socket, ++ * but only if the socket refcount is not zero. ++ */ ++ if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { ++ err = sock_queue_err_skb(sk, skb); ++ sock_put(sk); ++ } + if (err) + kfree_skb(skb); +- +- sock_put(sk); + } + EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); + diff --git a/queue-4.10/net-mlx5e-do-not-reduce-lro-wqe-size-when-not-using-build_skb.patch b/queue-4.10/net-mlx5e-do-not-reduce-lro-wqe-size-when-not-using-build_skb.patch new file mode 100644 index 00000000000..1996bbe51e6 --- /dev/null +++ b/queue-4.10/net-mlx5e-do-not-reduce-lro-wqe-size-when-not-using-build_skb.patch @@ -0,0 +1,56 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Tariq Toukan +Date: Wed, 22 Feb 2017 17:20:13 +0200 +Subject: net/mlx5e: Do not reduce LRO WQE size when not using build_skb + +From: Tariq Toukan + + +[ Upstream commit 4078e637c12f1e0a74293f1ec9563f42bff14a03 ] + +When rq_type is Striding RQ, no room of SKB_RESERVE is needed +as SKB allocation is not done via build_skb. + +Fixes: e4b85508072b ("net/mlx5e: Slightly reduce hardware LRO size") +Signed-off-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -81,6 +81,7 @@ static bool mlx5e_check_fragmented_strid + static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) + { + priv->params.rq_wq_type = rq_type; ++ priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; +@@ -93,6 +94,10 @@ static void mlx5e_set_rq_type_params(str + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; ++ ++ /* Extra room needed for build_skb */ ++ priv->params.lro_wqe_sz -= MLX5_RX_HEADROOM + ++ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + } + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); +@@ -3517,12 +3522,6 @@ static void mlx5e_build_nic_netdev_priv( + mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); + +- priv->params.lro_wqe_sz = +- MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - +- /* Extra room needed for build_skb */ +- MLX5_RX_HEADROOM - +- SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); +- + /* Initialize pflags */ + MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, + priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); diff --git a/queue-4.10/net-mlx5e-fix-broken-cqe-compression-initialization.patch b/queue-4.10/net-mlx5e-fix-broken-cqe-compression-initialization.patch new file mode 100644 index 00000000000..ae62164c71c --- /dev/null +++ b/queue-4.10/net-mlx5e-fix-broken-cqe-compression-initialization.patch @@ -0,0 +1,58 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Tariq Toukan +Date: Wed, 22 Feb 2017 17:20:14 +0200 +Subject: net/mlx5e: Fix broken CQE compression initialization + +From: Tariq Toukan + + +[ Upstream commit b0d4660b4cc52e6477ca3a43435351d565dfcedc ] + +Some of RQ type parameters are derived from CQE compression state flag, +CQE compression flag was initialized only after RQ type parameters +setup. This leads to load RQ with stride size smaller than what we +want for when CQE compression is on. + +This bug introduces no functional damage, it only makes CQE compression +occur less often, since in ConnectX4-LX CQE compression is performed +only on packets smaller than stride size. + +Fix this by marking default status of CQE compression in PFLAG prior to +calling mlx5e_set_rq_priv_params(), as it inits some fields based on it. + +Tested: + load driver on systems where rx CQE compress will be on (MH) + pktgen with 64 < pkt size < 256 and netperf TCP_STREAM (IPv4/IPv6) + verify `ethtool -S ethxx | grep compress` are advancing more often + (rapidly) + +Fixes: 2fc4bfb7250d ("net/mlx5e: Dynamic RQ type infrastructure") +Signed-off-by: Tariq Toukan +Cc: kernel-team@fb.com +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -3500,6 +3500,9 @@ static void mlx5e_build_nic_netdev_priv( + cqe_compress_heuristic(link_speed, pci_bw); + } + ++ MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, ++ priv->params.rx_cqe_compress_def); ++ + mlx5e_set_rq_priv_params(priv); + if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + priv->params.lro_en = true; +@@ -3525,7 +3528,6 @@ static void mlx5e_build_nic_netdev_priv( + /* Initialize pflags */ + MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, + priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); +- MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, priv->params.rx_cqe_compress_def); + + mutex_init(&priv->state_lock); + diff --git a/queue-4.10/net-mlx5e-fix-wrong-cqe-decompression.patch b/queue-4.10/net-mlx5e-fix-wrong-cqe-decompression.patch new file mode 100644 index 00000000000..8ec86dd8d5b --- /dev/null +++ b/queue-4.10/net-mlx5e-fix-wrong-cqe-decompression.patch @@ -0,0 +1,71 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Tariq Toukan +Date: Wed, 22 Feb 2017 17:20:16 +0200 +Subject: net/mlx5e: Fix wrong CQE decompression + +From: Tariq Toukan + + +[ Upstream commit 36154be40a28e4afaa0416da2681d80b7e2ca319 ] + +In cqe compression with striding RQ, the decompression of the CQE field +wqe_counter was done with a wrong wraparound value. +This caused handling cqes with a wrong pointer to wqe (rx descriptor) +and creating SKBs with wrong data, pointing to wrong (and already consumed) +strides/pages. + +The meaning of the CQE field wqe_counter in striding RQ holds the +stride index instead of the WQE index. Hence, when decompressing +a CQE, wqe_counter should have wrapped-around the number of strides +in a single multi-packet WQE. + +We dropped this wrap-around mask at all in CQE decompression of striding +RQ. It is not needed as in such cases the CQE compression session would +break because of different value of wqe_id field, starting a new +compression session. + +Tested: + ethtool -K ethxx lro off/on + ethtool --set-priv-flags ethxx rx_cqe_compress on + super_netperf 16 {ipv4,ipv6} -t TCP_STREAM -m 50 -D + verified no csum errors and no page refcount issues. + +Fixes: 7219ab34f184 ("net/mlx5e: CQE compression") +Signed-off-by: Tariq Toukan +Reported-by: Tom Herbert +Cc: kernel-team@fb.com +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -92,19 +92,18 @@ static inline void mlx5e_cqes_update_own + static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, u32 cqcc) + { +- u16 wqe_cnt_step; +- + cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt; + cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum; + cq->title.op_own &= 0xf0; + cq->title.op_own |= 0x01 & (cqcc >> cq->wq.log_sz); + cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter); + +- wqe_cnt_step = +- rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? +- mpwrq_get_cqe_consumed_strides(&cq->title) : 1; +- cq->decmprs_wqe_counter = +- (cq->decmprs_wqe_counter + wqe_cnt_step) & rq->wq.sz_m1; ++ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) ++ cq->decmprs_wqe_counter += ++ mpwrq_get_cqe_consumed_strides(&cq->title); ++ else ++ cq->decmprs_wqe_counter = ++ (cq->decmprs_wqe_counter + 1) & rq->wq.sz_m1; + } + + static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, diff --git a/queue-4.10/net-mlx5e-register-unregister-vport-representors-on-interface-attach-detach.patch b/queue-4.10/net-mlx5e-register-unregister-vport-representors-on-interface-attach-detach.patch new file mode 100644 index 00000000000..6e144fd2ccb --- /dev/null +++ b/queue-4.10/net-mlx5e-register-unregister-vport-representors-on-interface-attach-detach.patch @@ -0,0 +1,90 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Saeed Mahameed +Date: Wed, 22 Feb 2017 17:20:12 +0200 +Subject: net/mlx5e: Register/unregister vport representors on interface attach/detach + +From: Saeed Mahameed + + +[ Upstream commit 6f08a22c5fb2b9aefb8ecd8496758e7a677c1fde ] + +Currently vport representors are added only on driver load and removed on +driver unload. Apparently we forgot to handle them when we added the +seamless reset flow feature. This caused to leave the representors +netdevs alive and active with open HW resources on pci shutdown and on +error reset flows. + +To overcome this we move their handling to interface attach/detach, so +they would be cleaned up on shutdown and recreated on reset flows. + +Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks") +Signed-off-by: Saeed Mahameed +Reviewed-by: Hadar Hen Zion +Reviewed-by: Roi Dayan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 23 ++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -3940,6 +3940,19 @@ static void mlx5e_register_vport_rep(str + } + } + ++static void mlx5e_unregister_vport_rep(struct mlx5_core_dev *mdev) ++{ ++ struct mlx5_eswitch *esw = mdev->priv.eswitch; ++ int total_vfs = MLX5_TOTAL_VPORTS(mdev); ++ int vport; ++ ++ if (!MLX5_CAP_GEN(mdev, vport_group_manager)) ++ return; ++ ++ for (vport = 1; vport < total_vfs; vport++) ++ mlx5_eswitch_unregister_vport_rep(esw, vport); ++} ++ + void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) + { + struct mlx5e_priv *priv = netdev_priv(netdev); +@@ -3986,6 +3999,7 @@ static int mlx5e_attach(struct mlx5_core + return err; + } + ++ mlx5e_register_vport_rep(mdev); + return 0; + } + +@@ -3997,6 +4011,7 @@ static void mlx5e_detach(struct mlx5_cor + if (!netif_device_present(netdev)) + return; + ++ mlx5e_unregister_vport_rep(mdev); + mlx5e_detach_netdev(mdev, netdev); + mlx5e_destroy_mdev_resources(mdev); + } +@@ -4015,8 +4030,6 @@ static void *mlx5e_add(struct mlx5_core_ + if (err) + return NULL; + +- mlx5e_register_vport_rep(mdev); +- + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + ppriv = &esw->offloads.vport_reps[0]; + +@@ -4068,13 +4081,7 @@ void mlx5e_destroy_netdev(struct mlx5_co + + static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) + { +- struct mlx5_eswitch *esw = mdev->priv.eswitch; +- int total_vfs = MLX5_TOTAL_VPORTS(mdev); + struct mlx5e_priv *priv = vpriv; +- int vport; +- +- for (vport = 1; vport < total_vfs; vport++) +- mlx5_eswitch_unregister_vport_rep(esw, vport); + + unregister_netdev(priv->netdev); + mlx5e_detach(mdev, vpriv); diff --git a/queue-4.10/net-mlx5e-update-mpwqe-stride-size-when-modifying-cqe-compress-state.patch b/queue-4.10/net-mlx5e-update-mpwqe-stride-size-when-modifying-cqe-compress-state.patch new file mode 100644 index 00000000000..27e3db1bdb1 --- /dev/null +++ b/queue-4.10/net-mlx5e-update-mpwqe-stride-size-when-modifying-cqe-compress-state.patch @@ -0,0 +1,84 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Saeed Mahameed +Date: Wed, 22 Feb 2017 17:20:15 +0200 +Subject: net/mlx5e: Update MPWQE stride size when modifying CQE compress state + +From: Saeed Mahameed + + +[ Upstream commit 6dc4b54e77282caf17f0ff72aa32dd296037fbc0 ] + +When the admin enables/disables cqe compression, updating +mpwqe stride size is required: + CQE compress ON ==> stride size = 256B + CQE compress OFF ==> stride size = 64B + +This is already done on driver load via mlx5e_set_rq_type_params, all we +need is just to call it on arbitrary admin changes of cqe compression +state via priv flags or when changing timestamping state +(as it is mutually exclusive with cqe compression). + +This bug introduces no functional damage, it only makes cqe compression +occur less often, since in ConnectX4-LX CQE compression is performed +only on packets smaller than stride size. + +Tested: + ethtool --set-priv-flags ethxx rx_cqe_compress on + pktgen with 64 < pkt size < 256 and netperf TCP_STREAM (IPv4/IPv6) + verify `ethtool -S ethxx | grep compress` are advancing more often + (rapidly) + +Fixes: 7219ab34f184 ("net/mlx5e: CQE compression") +Signed-off-by: Saeed Mahameed +Reviewed-by: Tariq Toukan +Cc: kernel-team@fb.com +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + + drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 1 + + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 1 + + 4 files changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h +@@ -803,6 +803,7 @@ int mlx5e_get_max_linkspeed(struct mlx5_ + + void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, + u8 cq_period_mode); ++void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type); + + static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, + struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +@@ -1477,6 +1477,7 @@ static int set_pflag_rx_cqe_compress(str + + MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, enable); + priv->params.rx_cqe_compress_def = enable; ++ mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type); + + if (reset) + err = mlx5e_open_locked(netdev); +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -78,7 +78,7 @@ static bool mlx5e_check_fragmented_strid + MLX5_CAP_ETH(mdev, reg_umr_sq); + } + +-static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) ++void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) + { + priv->params.rq_wq_type = rq_type; + priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -172,6 +172,7 @@ void mlx5e_modify_rx_cqe_compression(str + mlx5e_close_locked(priv->netdev); + + MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, val); ++ mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type); + + if (was_opened) + mlx5e_open_locked(priv->netdev); diff --git a/queue-4.10/net-net_enable_timestamp-can-be-called-from-irq-contexts.patch b/queue-4.10/net-net_enable_timestamp-can-be-called-from-irq-contexts.patch new file mode 100644 index 00000000000..e9b2f282dbb --- /dev/null +++ b/queue-4.10/net-net_enable_timestamp-can-be-called-from-irq-contexts.patch @@ -0,0 +1,96 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Eric Dumazet +Date: Wed, 1 Mar 2017 14:28:39 -0800 +Subject: net: net_enable_timestamp() can be called from irq contexts + +From: Eric Dumazet + + +[ Upstream commit 13baa00ad01bb3a9f893e3a08cbc2d072fc0c15d ] + +It is now very clear that silly TCP listeners might play with +enabling/disabling timestamping while new children are added +to their accept queue. + +Meaning net_enable_timestamp() can be called from BH context +while current state of the static key is not enabled. + +Lets play safe and allow all contexts. + +The work queue is scheduled only under the problematic cases, +which are the static key enable/disable transition, to not slow down +critical paths. + +This extends and improves what we did in commit 5fa8bbda38c6 ("net: use +a work queue to defer net_disable_timestamp() work") + +Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context") +Signed-off-by: Eric Dumazet +Reported-by: Dmitry Vyukov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 35 +++++++++++++++++++++++++++++++---- + 1 file changed, 31 insertions(+), 4 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1696,27 +1696,54 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue); + static struct static_key netstamp_needed __read_mostly; + #ifdef HAVE_JUMP_LABEL + static atomic_t netstamp_needed_deferred; ++static atomic_t netstamp_wanted; + static void netstamp_clear(struct work_struct *work) + { + int deferred = atomic_xchg(&netstamp_needed_deferred, 0); ++ int wanted; + +- while (deferred--) +- static_key_slow_dec(&netstamp_needed); ++ wanted = atomic_add_return(deferred, &netstamp_wanted); ++ if (wanted > 0) ++ static_key_enable(&netstamp_needed); ++ else ++ static_key_disable(&netstamp_needed); + } + static DECLARE_WORK(netstamp_work, netstamp_clear); + #endif + + void net_enable_timestamp(void) + { ++#ifdef HAVE_JUMP_LABEL ++ int wanted; ++ ++ while (1) { ++ wanted = atomic_read(&netstamp_wanted); ++ if (wanted <= 0) ++ break; ++ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted) ++ return; ++ } ++ atomic_inc(&netstamp_needed_deferred); ++ schedule_work(&netstamp_work); ++#else + static_key_slow_inc(&netstamp_needed); ++#endif + } + EXPORT_SYMBOL(net_enable_timestamp); + + void net_disable_timestamp(void) + { + #ifdef HAVE_JUMP_LABEL +- /* net_disable_timestamp() can be called from non process context */ +- atomic_inc(&netstamp_needed_deferred); ++ int wanted; ++ ++ while (1) { ++ wanted = atomic_read(&netstamp_wanted); ++ if (wanted <= 1) ++ break; ++ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted) ++ return; ++ } ++ atomic_dec(&netstamp_needed_deferred); + schedule_work(&netstamp_work); + #else + static_key_slow_dec(&netstamp_needed); diff --git a/queue-4.10/net-sched-act_skbmod-remove-unneeded-rcu_read_unlock-in-tcf_skbmod_dump.patch b/queue-4.10/net-sched-act_skbmod-remove-unneeded-rcu_read_unlock-in-tcf_skbmod_dump.patch new file mode 100644 index 00000000000..75caaf8252c --- /dev/null +++ b/queue-4.10/net-sched-act_skbmod-remove-unneeded-rcu_read_unlock-in-tcf_skbmod_dump.patch @@ -0,0 +1,30 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Alexey Khoroshilov +Date: Sun, 5 Mar 2017 03:01:55 +0300 +Subject: net/sched: act_skbmod: remove unneeded rcu_read_unlock in tcf_skbmod_dump + +From: Alexey Khoroshilov + + +[ Upstream commit 6c4dc75c251721f517e9daeb5370ea606b5b35ce ] + +Found by Linux Driver Verification project (linuxtesting.org). + +Signed-off-by: Alexey Khoroshilov +Acked-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_skbmod.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/sched/act_skbmod.c ++++ b/net/sched/act_skbmod.c +@@ -228,7 +228,6 @@ static int tcf_skbmod_dump(struct sk_buf + + return skb->len; + nla_put_failure: +- rcu_read_unlock(); + nlmsg_trim(skb, b); + return -1; + } diff --git a/queue-4.10/net-sched-actions-decrement-module-reference-count-after-table-flush.patch b/queue-4.10/net-sched-actions-decrement-module-reference-count-after-table-flush.patch new file mode 100644 index 00000000000..29571db4b90 --- /dev/null +++ b/queue-4.10/net-sched-actions-decrement-module-reference-count-after-table-flush.patch @@ -0,0 +1,94 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Roman Mashak +Date: Fri, 24 Feb 2017 11:00:32 -0500 +Subject: net sched actions: decrement module reference count after table flush. + +From: Roman Mashak + + +[ Upstream commit edb9d1bff4bbe19b8ae0e71b1f38732591a9eeb2 ] + +When tc actions are loaded as a module and no actions have been installed, +flushing them would result in actions removed from the memory, but modules +reference count not being decremented, so that the modules would not be +unloaded. + +Following is example with GACT action: + +% sudo modprobe act_gact +% lsmod +Module Size Used by +act_gact 16384 0 +% +% sudo tc actions ls action gact +% +% sudo tc actions flush action gact +% lsmod +Module Size Used by +act_gact 16384 1 +% sudo tc actions flush action gact +% lsmod +Module Size Used by +act_gact 16384 2 +% sudo rmmod act_gact +rmmod: ERROR: Module act_gact is in use +.... + +After the fix: +% lsmod +Module Size Used by +act_gact 16384 0 +% +% sudo tc actions add action pass index 1 +% sudo tc actions add action pass index 2 +% sudo tc actions add action pass index 3 +% lsmod +Module Size Used by +act_gact 16384 3 +% +% sudo tc actions flush action gact +% lsmod +Module Size Used by +act_gact 16384 0 +% +% sudo tc actions flush action gact +% lsmod +Module Size Used by +act_gact 16384 0 +% sudo rmmod act_gact +% lsmod +Module Size Used by +% + +Fixes: f97017cdefef ("net-sched: Fix actions flushing") +Signed-off-by: Roman Mashak +Signed-off-by: Jamal Hadi Salim +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_api.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/net/sched/act_api.c ++++ b/net/sched/act_api.c +@@ -817,10 +817,8 @@ static int tca_action_flush(struct net * + goto out_module_put; + + err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops); +- if (err < 0) ++ if (err <= 0) + goto out_module_put; +- if (err == 0) +- goto noflush_out; + + nla_nest_end(skb, nest); + +@@ -837,7 +835,6 @@ static int tca_action_flush(struct net * + out_module_put: + module_put(ops->owner); + err_out: +-noflush_out: + kfree_skb(skb); + return err; + } diff --git a/queue-4.10/net-tunnel-set-inner-protocol-in-network-gro-hooks.patch b/queue-4.10/net-tunnel-set-inner-protocol-in-network-gro-hooks.patch new file mode 100644 index 00000000000..daf6a58ea4c --- /dev/null +++ b/queue-4.10/net-tunnel-set-inner-protocol-in-network-gro-hooks.patch @@ -0,0 +1,70 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Paolo Abeni +Date: Tue, 7 Mar 2017 18:33:31 +0100 +Subject: net/tunnel: set inner protocol in network gro hooks + +From: Paolo Abeni + + +[ Upstream commit 294acf1c01bace5cea5d30b510504238bf5f7c25 ] + +The gso code of several tunnels type (gre and udp tunnels) +takes for granted that the skb->inner_protocol is properly +initialized and drops the packet elsewhere. + +On the forwarding path no one is initializing such field, +so gro encapsulated packets are dropped on forward. + +Since commit 38720352412a ("gre: Use inner_proto to obtain +inner header protocol"), this can be reproduced when the +encapsulated packets use gre as the tunneling protocol. + +The issue happens also with vxlan and geneve tunnels since +commit 8bce6d7d0d1e ("udp: Generalize skb_udp_segment"), if the +forwarding host's ingress nic has h/w offload for such tunnel +and a vxlan/geneve device is configured on top of it, regardless +of the configured peer address and vni. + +To address the issue, this change initialize the inner_protocol +field for encapsulated packets in both ipv4 and ipv6 gro complete +callbacks. + +Fixes: 38720352412a ("gre: Use inner_proto to obtain inner header protocol") +Fixes: 8bce6d7d0d1e ("udp: Generalize skb_udp_segment") +Signed-off-by: Paolo Abeni +Acked-by: Alexander Duyck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/af_inet.c | 4 +++- + net/ipv6/ip6_offload.c | 4 +++- + 2 files changed, 6 insertions(+), 2 deletions(-) + +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -1470,8 +1470,10 @@ int inet_gro_complete(struct sk_buff *sk + int proto = iph->protocol; + int err = -ENOSYS; + +- if (skb->encapsulation) ++ if (skb->encapsulation) { ++ skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP)); + skb_set_inner_network_header(skb, nhoff); ++ } + + csum_replace2(&iph->check, iph->tot_len, newlen); + iph->tot_len = newlen; +--- a/net/ipv6/ip6_offload.c ++++ b/net/ipv6/ip6_offload.c +@@ -294,8 +294,10 @@ static int ipv6_gro_complete(struct sk_b + struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff); + int err = -ENOSYS; + +- if (skb->encapsulation) ++ if (skb->encapsulation) { ++ skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6)); + skb_set_inner_network_header(skb, nhoff); ++ } + + iph->payload_len = htons(skb->len - nhoff - sizeof(*iph)); + diff --git a/queue-4.10/net-use-net-count-to-check-whether-a-netns-is-alive-or-not.patch b/queue-4.10/net-use-net-count-to-check-whether-a-netns-is-alive-or-not.patch new file mode 100644 index 00000000000..7e6d185e309 --- /dev/null +++ b/queue-4.10/net-use-net-count-to-check-whether-a-netns-is-alive-or-not.patch @@ -0,0 +1,54 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Andrey Vagin +Date: Sun, 12 Mar 2017 21:36:18 -0700 +Subject: net: use net->count to check whether a netns is alive or not + +From: Andrey Vagin + + +[ Upstream commit 91864f5852f9996210fad400cf70fb85af091243 ] + +The previous idea was to check whether a net namespace is in +net_exit_list or not. It doesn't work, because net->exit_list is used in +__register_pernet_operations and __unregister_pernet_operations where +all namespaces are added to a temporary list to make cleanup in a error +case, so list_empty(&net->exit_list) always returns false. + +Reported-by: Mantas Mikulėnas +Fixes: 002d8a1a6c11 ("net: skip genenerating uevents for network namespaces that are exiting") +Signed-off-by: Andrei Vagin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/net-sysfs.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/core/net-sysfs.c ++++ b/net/core/net-sysfs.c +@@ -952,7 +952,7 @@ net_rx_queue_update_kobjects(struct net_ + while (--i >= new_num) { + struct kobject *kobj = &dev->_rx[i].kobj; + +- if (!list_empty(&dev_net(dev)->exit_list)) ++ if (!atomic_read(&dev_net(dev)->count)) + kobj->uevent_suppress = 1; + if (dev->sysfs_rx_queue_group) + sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); +@@ -1370,7 +1370,7 @@ netdev_queue_update_kobjects(struct net_ + while (--i >= new_num) { + struct netdev_queue *queue = dev->_tx + i; + +- if (!list_empty(&dev_net(dev)->exit_list)) ++ if (!atomic_read(&dev_net(dev)->count)) + queue->kobj.uevent_suppress = 1; + #ifdef CONFIG_BQL + sysfs_remove_group(&queue->kobj, &dql_group); +@@ -1557,7 +1557,7 @@ void netdev_unregister_kobject(struct ne + { + struct device *dev = &(ndev->dev); + +- if (!list_empty(&dev_net(ndev)->exit_list)) ++ if (!atomic_read(&dev_net(ndev)->count)) + dev_set_uevent_suppress(dev, 1); + + kobject_get(&dev->kobj); diff --git a/queue-4.10/sctp-deny-peeloff-operation-on-asocs-with-threads-sleeping-on-it.patch b/queue-4.10/sctp-deny-peeloff-operation-on-asocs-with-threads-sleeping-on-it.patch new file mode 100644 index 00000000000..b1358137463 --- /dev/null +++ b/queue-4.10/sctp-deny-peeloff-operation-on-asocs-with-threads-sleeping-on-it.patch @@ -0,0 +1,66 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Marcelo Ricardo Leitner +Date: Thu, 23 Feb 2017 09:31:18 -0300 +Subject: sctp: deny peeloff operation on asocs with threads sleeping on it + +From: Marcelo Ricardo Leitner + + +[ Upstream commit dfcb9f4f99f1e9a49e43398a7bfbf56927544af1 ] + +commit 2dcab5984841 ("sctp: avoid BUG_ON on sctp_wait_for_sndbuf") +attempted to avoid a BUG_ON call when the association being used for a +sendmsg() is blocked waiting for more sndbuf and another thread did a +peeloff operation on such asoc, moving it to another socket. + +As Ben Hutchings noticed, then in such case it would return without +locking back the socket and would cause two unlocks in a row. + +Further analysis also revealed that it could allow a double free if the +application managed to peeloff the asoc that is created during the +sendmsg call, because then sctp_sendmsg() would try to free the asoc +that was created only for that call. + +This patch takes another approach. It will deny the peeloff operation +if there is a thread sleeping on the asoc, so this situation doesn't +exist anymore. This avoids the issues described above and also honors +the syscalls that are already being handled (it can be multiple sendmsg +calls). + +Joint work with Xin Long. + +Fixes: 2dcab5984841 ("sctp: avoid BUG_ON on sctp_wait_for_sndbuf") +Cc: Alexander Popov +Cc: Ben Hutchings +Signed-off-by: Marcelo Ricardo Leitner +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -4734,6 +4734,12 @@ int sctp_do_peeloff(struct sock *sk, sct + if (!asoc) + return -EINVAL; + ++ /* If there is a thread waiting on more sndbuf space for ++ * sending on this asoc, it cannot be peeled. ++ */ ++ if (waitqueue_active(&asoc->wait)) ++ return -EBUSY; ++ + /* An association cannot be branched off from an already peeled-off + * socket, nor is this supported for tcp style sockets. + */ +@@ -7426,8 +7432,6 @@ static int sctp_wait_for_sndbuf(struct s + */ + release_sock(sk); + current_timeo = schedule_timeout(current_timeo); +- if (sk != asoc->base.sk) +- goto do_error; + lock_sock(sk); + + *timeo_p = current_timeo; diff --git a/queue-4.10/sctp-set-sin_port-for-addr-param-when-checking-duplicate-address.patch b/queue-4.10/sctp-set-sin_port-for-addr-param-when-checking-duplicate-address.patch new file mode 100644 index 00000000000..7f848f392b2 --- /dev/null +++ b/queue-4.10/sctp-set-sin_port-for-addr-param-when-checking-duplicate-address.patch @@ -0,0 +1,53 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Xin Long +Date: Fri, 24 Feb 2017 15:18:46 +0800 +Subject: sctp: set sin_port for addr param when checking duplicate address + +From: Xin Long + + +[ Upstream commit 2e3ce5bc2aa938653c3866aa7f4901a1f199b1c8 ] + +Commit b8607805dd15 ("sctp: not copying duplicate addrs to the assoc's +bind address list") tried to check for duplicate address before copying +to asoc's bind_addr list from global addr list. + +But all the addrs' sin_ports in global addr list are 0 while the addrs' +sin_ports are bp->port in asoc's bind_addr list. It means even if it's +a duplicate address, af->cmp_addr will still return 0 as the their +sin_ports are different. + +This patch is to fix it by setting the sin_port for addr param with +bp->port before comparing the addrs. + +Fixes: b8607805dd15 ("sctp: not copying duplicate addrs to the assoc's bind address list") +Reported-by: Wei Chen +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/protocol.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/net/sctp/protocol.c ++++ b/net/sctp/protocol.c +@@ -199,6 +199,7 @@ int sctp_copy_local_addr_list(struct net + sctp_scope_t scope, gfp_t gfp, int copy_flags) + { + struct sctp_sockaddr_entry *addr; ++ union sctp_addr laddr; + int error = 0; + + rcu_read_lock(); +@@ -220,7 +221,10 @@ int sctp_copy_local_addr_list(struct net + !(copy_flags & SCTP_ADDR6_PEERSUPP))) + continue; + +- if (sctp_bind_addr_state(bp, &addr->a) != -1) ++ laddr = addr->a; ++ /* also works for setting ipv6 address port */ ++ laddr.v4.sin_port = htons(bp->port); ++ if (sctp_bind_addr_state(bp, &laddr) != -1) + continue; + + error = sctp_add_bind_addr(bp, &addr->a, sizeof(addr->a), diff --git a/queue-4.10/series b/queue-4.10/series new file mode 100644 index 00000000000..31ece9e0e7f --- /dev/null +++ b/queue-4.10/series @@ -0,0 +1,48 @@ +net-mlx5e-register-unregister-vport-representors-on-interface-attach-detach.patch +net-mlx5e-do-not-reduce-lro-wqe-size-when-not-using-build_skb.patch +net-mlx5e-fix-broken-cqe-compression-initialization.patch +net-mlx5e-update-mpwqe-stride-size-when-modifying-cqe-compress-state.patch +net-mlx5e-fix-wrong-cqe-decompression.patch +sctp-deny-peeloff-operation-on-asocs-with-threads-sleeping-on-it.patch +vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch +vti6-return-gre_key-for-vti6.patch +vxlan-don-t-allow-overwrite-of-config-src-addr.patch +ipv4-add-missing-initialization-for-flowi4_uid.patch +ipv4-mask-tos-for-input-route.patch +sctp-set-sin_port-for-addr-param-when-checking-duplicate-address.patch +net-sched-actions-decrement-module-reference-count-after-table-flush.patch +l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch +vxlan-lock-rcu-on-tx-path.patch +geneve-lock-rcu-on-tx-path.patch +mlxsw-spectrum_router-avoid-potential-packets-loss.patch +tcp-dccp-block-bh-for-syn-processing.patch +net-bridge-allow-ipv6-when-multicast-flood-is-disabled.patch +net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch +net-net_enable_timestamp-can-be-called-from-irq-contexts.patch +ipv6-orphan-skbs-in-reassembly-unit.patch +dccp-unlock-sock-before-calling-sk_free.patch +amd-xgbe-stop-the-phy-before-releasing-interrupts.patch +amd-xgbe-be-sure-to-set-mdio-modes-on-device-re-start.patch +amd-xgbe-don-t-overwrite-sfp-phy-mod_absent-settings.patch +bonding-use-eth_max_mtu-as-max-mtu.patch +strparser-destroy-workqueue-on-module-exit.patch +tcp-fix-various-issues-for-sockets-morphing-to-listen-state.patch +net-fix-socket-refcounting-in-skb_complete_wifi_ack.patch +net-fix-socket-refcounting-in-skb_complete_tx_timestamp.patch +net-sched-act_skbmod-remove-unneeded-rcu_read_unlock-in-tcf_skbmod_dump.patch +dccp-fix-use-after-free-in-dccp_feat_activate_values.patch +team-use-eth_max_mtu-as-max-mtu.patch +vrf-fix-use-after-free-in-vrf_xmit.patch +net-tunnel-set-inner-protocol-in-network-gro-hooks.patch +uapi-fix-linux-packet_diag.h-userspace-compilation-error.patch +amd-xgbe-enable-irqs-only-if-napi_complete_done-is-true.patch +act_connmark-avoid-crashing-on-malformed-nlattrs-with-null-parms.patch +mpls-send-route-delete-notifications-when-router-module-is-unloaded.patch +mpls-do-not-decrement-alive-counter-for-unregister-events.patch +ipv6-make-ecmp-route-replacement-less-greedy.patch +ipv6-avoid-write-to-a-possibly-cloned-skb.patch +bridge-drop-netfilter-fake-rtable-unconditionally.patch +net-use-net-count-to-check-whether-a-netns-is-alive-or-not.patch +dccp-tcp-fix-routing-redirect-race.patch +tun-fix-premature-pollout-notification-on-tun-devices.patch +dccp-fix-memory-leak-during-tear-down-of-unsuccessful-connection-request.patch diff --git a/queue-4.10/strparser-destroy-workqueue-on-module-exit.patch b/queue-4.10/strparser-destroy-workqueue-on-module-exit.patch new file mode 100644 index 00000000000..eaa87c61160 --- /dev/null +++ b/queue-4.10/strparser-destroy-workqueue-on-module-exit.patch @@ -0,0 +1,29 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: WANG Cong +Date: Fri, 3 Mar 2017 12:21:14 -0800 +Subject: strparser: destroy workqueue on module exit + +From: WANG Cong + + +[ Upstream commit f78ef7cd9a0686b979679d0de061c6dbfd8d649e ] + +Fixes: 43a0c6751a32 ("strparser: Stream parser for messages") +Cc: Tom Herbert +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/strparser/strparser.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/strparser/strparser.c ++++ b/net/strparser/strparser.c +@@ -504,6 +504,7 @@ static int __init strp_mod_init(void) + + static void __exit strp_mod_exit(void) + { ++ destroy_workqueue(strp_wq); + } + module_init(strp_mod_init); + module_exit(strp_mod_exit); diff --git a/queue-4.10/tcp-dccp-block-bh-for-syn-processing.patch b/queue-4.10/tcp-dccp-block-bh-for-syn-processing.patch new file mode 100644 index 00000000000..2e2493bbaad --- /dev/null +++ b/queue-4.10/tcp-dccp-block-bh-for-syn-processing.patch @@ -0,0 +1,206 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Eric Dumazet +Date: Wed, 1 Mar 2017 08:39:49 -0800 +Subject: tcp/dccp: block BH for SYN processing + +From: Eric Dumazet + + +[ Upstream commit 449809a66c1d0b1563dee84493e14bf3104d2d7e ] + +SYN processing really was meant to be handled from BH. + +When I got rid of BH blocking while processing socket backlog +in commit 5413d1babe8f ("net: do not block BH while processing socket +backlog"), I forgot that a malicious user could transition to TCP_LISTEN +from a state that allowed (SYN) packets to be parked in the socket +backlog while socket is owned by the thread doing the listen() call. + +Sure enough syzkaller found this and reported the bug ;) + +================================= +[ INFO: inconsistent lock state ] +4.10.0+ #60 Not tainted +--------------------------------- +inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. +syz-executor0/5090 [HC0[0]:SC0[0]:HE1:SE1] takes: + (&(&hashinfo->ehash_locks[i])->rlock){+.?...}, at: +[] spin_lock include/linux/spinlock.h:299 [inline] + (&(&hashinfo->ehash_locks[i])->rlock){+.?...}, at: +[] inet_ehash_insert+0x240/0xad0 +net/ipv4/inet_hashtables.c:407 +{IN-SOFTIRQ-W} state was registered at: + mark_irqflags kernel/locking/lockdep.c:2923 [inline] + __lock_acquire+0xbcf/0x3270 kernel/locking/lockdep.c:3295 + lock_acquire+0x241/0x580 kernel/locking/lockdep.c:3753 + __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] + _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151 + spin_lock include/linux/spinlock.h:299 [inline] + inet_ehash_insert+0x240/0xad0 net/ipv4/inet_hashtables.c:407 + reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:753 [inline] + inet_csk_reqsk_queue_hash_add+0x1b7/0x2a0 net/ipv4/inet_connection_sock.c:764 + tcp_conn_request+0x25cc/0x3310 net/ipv4/tcp_input.c:6399 + tcp_v4_conn_request+0x157/0x220 net/ipv4/tcp_ipv4.c:1262 + tcp_rcv_state_process+0x802/0x4130 net/ipv4/tcp_input.c:5889 + tcp_v4_do_rcv+0x56b/0x940 net/ipv4/tcp_ipv4.c:1433 + tcp_v4_rcv+0x2e12/0x3210 net/ipv4/tcp_ipv4.c:1711 + ip_local_deliver_finish+0x4ce/0xc40 net/ipv4/ip_input.c:216 + NF_HOOK include/linux/netfilter.h:257 [inline] + ip_local_deliver+0x1ce/0x710 net/ipv4/ip_input.c:257 + dst_input include/net/dst.h:492 [inline] + ip_rcv_finish+0xb1d/0x2110 net/ipv4/ip_input.c:396 + NF_HOOK include/linux/netfilter.h:257 [inline] + ip_rcv+0xd90/0x19c0 net/ipv4/ip_input.c:487 + __netif_receive_skb_core+0x1ad1/0x3400 net/core/dev.c:4179 + __netif_receive_skb+0x2a/0x170 net/core/dev.c:4217 + netif_receive_skb_internal+0x1d6/0x430 net/core/dev.c:4245 + napi_skb_finish net/core/dev.c:4602 [inline] + napi_gro_receive+0x4e6/0x680 net/core/dev.c:4636 + e1000_receive_skb drivers/net/ethernet/intel/e1000/e1000_main.c:4033 [inline] + e1000_clean_rx_irq+0x5e0/0x1490 +drivers/net/ethernet/intel/e1000/e1000_main.c:4489 + e1000_clean+0xb9a/0x2910 drivers/net/ethernet/intel/e1000/e1000_main.c:3834 + napi_poll net/core/dev.c:5171 [inline] + net_rx_action+0xe70/0x1900 net/core/dev.c:5236 + __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 + invoke_softirq kernel/softirq.c:364 [inline] + irq_exit+0x19e/0x1d0 kernel/softirq.c:405 + exiting_irq arch/x86/include/asm/apic.h:658 [inline] + do_IRQ+0x81/0x1a0 arch/x86/kernel/irq.c:250 + ret_from_intr+0x0/0x20 + native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:53 + arch_safe_halt arch/x86/include/asm/paravirt.h:98 [inline] + default_idle+0x8f/0x410 arch/x86/kernel/process.c:271 + arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:262 + default_idle_call+0x36/0x60 kernel/sched/idle.c:96 + cpuidle_idle_call kernel/sched/idle.c:154 [inline] + do_idle+0x348/0x440 kernel/sched/idle.c:243 + cpu_startup_entry+0x18/0x20 kernel/sched/idle.c:345 + start_secondary+0x344/0x440 arch/x86/kernel/smpboot.c:272 + verify_cpu+0x0/0xfc +irq event stamp: 1741 +hardirqs last enabled at (1741): [] +__raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:160 +[inline] +hardirqs last enabled at (1741): [] +_raw_spin_unlock_irqrestore+0xf7/0x1a0 kernel/locking/spinlock.c:191 +hardirqs last disabled at (1740): [] +__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:108 [inline] +hardirqs last disabled at (1740): [] +_raw_spin_lock_irqsave+0xa2/0x110 kernel/locking/spinlock.c:159 +softirqs last enabled at (1738): [] +__do_softirq+0x7cf/0xb7d kernel/softirq.c:310 +softirqs last disabled at (1571): [] +do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 + +other info that might help us debug this: + Possible unsafe locking scenario: + + CPU0 + ---- + lock(&(&hashinfo->ehash_locks[i])->rlock); + + lock(&(&hashinfo->ehash_locks[i])->rlock); + + *** DEADLOCK *** + +1 lock held by syz-executor0/5090: + #0: (sk_lock-AF_INET6){+.+.+.}, at: [] lock_sock +include/net/sock.h:1460 [inline] + #0: (sk_lock-AF_INET6){+.+.+.}, at: [] +sock_setsockopt+0x233/0x1e40 net/core/sock.c:683 + +stack backtrace: +CPU: 1 PID: 5090 Comm: syz-executor0 Not tainted 4.10.0+ #60 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:15 [inline] + dump_stack+0x292/0x398 lib/dump_stack.c:51 + print_usage_bug+0x3ef/0x450 kernel/locking/lockdep.c:2387 + valid_state kernel/locking/lockdep.c:2400 [inline] + mark_lock_irq kernel/locking/lockdep.c:2602 [inline] + mark_lock+0xf30/0x1410 kernel/locking/lockdep.c:3065 + mark_irqflags kernel/locking/lockdep.c:2941 [inline] + __lock_acquire+0x6dc/0x3270 kernel/locking/lockdep.c:3295 + lock_acquire+0x241/0x580 kernel/locking/lockdep.c:3753 + __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] + _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151 + spin_lock include/linux/spinlock.h:299 [inline] + inet_ehash_insert+0x240/0xad0 net/ipv4/inet_hashtables.c:407 + reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:753 [inline] + inet_csk_reqsk_queue_hash_add+0x1b7/0x2a0 net/ipv4/inet_connection_sock.c:764 + dccp_v6_conn_request+0xada/0x11b0 net/dccp/ipv6.c:380 + dccp_rcv_state_process+0x51e/0x1660 net/dccp/input.c:606 + dccp_v6_do_rcv+0x213/0x350 net/dccp/ipv6.c:632 + sk_backlog_rcv include/net/sock.h:896 [inline] + __release_sock+0x127/0x3a0 net/core/sock.c:2052 + release_sock+0xa5/0x2b0 net/core/sock.c:2539 + sock_setsockopt+0x60f/0x1e40 net/core/sock.c:1016 + SYSC_setsockopt net/socket.c:1782 [inline] + SyS_setsockopt+0x2fb/0x3a0 net/socket.c:1765 + entry_SYSCALL_64_fastpath+0x1f/0xc2 +RIP: 0033:0x4458b9 +RSP: 002b:00007fe8b26c2b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000036 +RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 00000000004458b9 +RDX: 000000000000001a RSI: 0000000000000001 RDI: 0000000000000006 +RBP: 00000000006e2110 R08: 0000000000000010 R09: 0000000000000000 +R10: 00000000208c3000 R11: 0000000000000292 R12: 0000000000708000 +R13: 0000000020000000 R14: 0000000000001000 R15: 0000000000000000 + +Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog") +Signed-off-by: Eric Dumazet +Reported-by: Andrey Konovalov +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/input.c | 10 ++++++++-- + net/ipv4/tcp_input.c | 10 ++++++++-- + 2 files changed, 16 insertions(+), 4 deletions(-) + +--- a/net/dccp/input.c ++++ b/net/dccp/input.c +@@ -577,6 +577,7 @@ int dccp_rcv_state_process(struct sock * + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + const int old_state = sk->sk_state; ++ bool acceptable; + int queued = 0; + + /* +@@ -603,8 +604,13 @@ int dccp_rcv_state_process(struct sock * + */ + if (sk->sk_state == DCCP_LISTEN) { + if (dh->dccph_type == DCCP_PKT_REQUEST) { +- if (inet_csk(sk)->icsk_af_ops->conn_request(sk, +- skb) < 0) ++ /* It is possible that we process SYN packets from backlog, ++ * so we need to make sure to disable BH right there. ++ */ ++ local_bh_disable(); ++ acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0; ++ local_bh_enable(); ++ if (!acceptable) + return 1; + consume_skb(skb); + return 0; +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -5916,9 +5916,15 @@ int tcp_rcv_state_process(struct sock *s + if (th->syn) { + if (th->fin) + goto discard; +- if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) +- return 1; ++ /* It is possible that we process SYN packets from backlog, ++ * so we need to make sure to disable BH right there. ++ */ ++ local_bh_disable(); ++ acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; ++ local_bh_enable(); + ++ if (!acceptable) ++ return 1; + consume_skb(skb); + return 0; + } diff --git a/queue-4.10/tcp-fix-various-issues-for-sockets-morphing-to-listen-state.patch b/queue-4.10/tcp-fix-various-issues-for-sockets-morphing-to-listen-state.patch new file mode 100644 index 00000000000..57c7ad48e43 --- /dev/null +++ b/queue-4.10/tcp-fix-various-issues-for-sockets-morphing-to-listen-state.patch @@ -0,0 +1,74 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Eric Dumazet +Date: Fri, 3 Mar 2017 14:08:21 -0800 +Subject: tcp: fix various issues for sockets morphing to listen state + +From: Eric Dumazet + + +[ Upstream commit 02b2faaf0af1d85585f6d6980e286d53612acfc2 ] + +Dmitry Vyukov reported a divide by 0 triggered by syzkaller, exploiting +tcp_disconnect() path that was never really considered and/or used +before syzkaller ;) + +I was not able to reproduce the bug, but it seems issues here are the +three possible actions that assumed they would never trigger on a +listener. + +1) tcp_write_timer_handler +2) tcp_delack_timer_handler +3) MTU reduction + +Only IPv6 MTU reduction was properly testing TCP_CLOSE and TCP_LISTEN + states from tcp_v6_mtu_reduced() + +Signed-off-by: Eric Dumazet +Reported-by: Dmitry Vyukov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_ipv4.c | 7 +++++-- + net/ipv4/tcp_timer.c | 6 ++++-- + 2 files changed, 9 insertions(+), 4 deletions(-) + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -269,10 +269,13 @@ EXPORT_SYMBOL(tcp_v4_connect); + */ + void tcp_v4_mtu_reduced(struct sock *sk) + { +- struct dst_entry *dst; + struct inet_sock *inet = inet_sk(sk); +- u32 mtu = tcp_sk(sk)->mtu_info; ++ struct dst_entry *dst; ++ u32 mtu; + ++ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) ++ return; ++ mtu = tcp_sk(sk)->mtu_info; + dst = inet_csk_update_pmtu(sk, mtu); + if (!dst) + return; +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -249,7 +249,8 @@ void tcp_delack_timer_handler(struct soc + + sk_mem_reclaim_partial(sk); + +- if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) ++ if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || ++ !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + goto out; + + if (time_after(icsk->icsk_ack.timeout, jiffies)) { +@@ -552,7 +553,8 @@ void tcp_write_timer_handler(struct sock + struct inet_connection_sock *icsk = inet_csk(sk); + int event; + +- if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) ++ if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || ++ !icsk->icsk_pending) + goto out; + + if (time_after(icsk->icsk_timeout, jiffies)) { diff --git a/queue-4.10/team-use-eth_max_mtu-as-max-mtu.patch b/queue-4.10/team-use-eth_max_mtu-as-max-mtu.patch new file mode 100644 index 00000000000..21d91f0ad2c --- /dev/null +++ b/queue-4.10/team-use-eth_max_mtu-as-max-mtu.patch @@ -0,0 +1,38 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Jarod Wilson +Date: Mon, 6 Mar 2017 08:48:58 -0500 +Subject: team: use ETH_MAX_MTU as max mtu + +From: Jarod Wilson + + +[ Upstream commit 3331aa378e9bcbd0d16de9034b0c20f4050e26b4 ] + +This restores the ability to set a team device's mtu to anything higher +than 1500. Similar to the reported issue with bonding, the team driver +calls ether_setup(), which sets an initial max_mtu of 1500, while the +underlying hardware can handle something much larger. Just set it to +ETH_MAX_MTU to support all possible values, and the limitations of the +underlying devices will prevent setting anything too large. + +Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra") +CC: Cong Wang +CC: Jiri Pirko +CC: netdev@vger.kernel.org +Signed-off-by: Jarod Wilson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/team/team.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -2075,6 +2075,7 @@ static int team_dev_type_check_change(st + static void team_setup(struct net_device *dev) + { + ether_setup(dev); ++ dev->max_mtu = ETH_MAX_MTU; + + dev->netdev_ops = &team_netdev_ops; + dev->ethtool_ops = &team_ethtool_ops; diff --git a/queue-4.10/tun-fix-premature-pollout-notification-on-tun-devices.patch b/queue-4.10/tun-fix-premature-pollout-notification-on-tun-devices.patch new file mode 100644 index 00000000000..bc16be3a4b7 --- /dev/null +++ b/queue-4.10/tun-fix-premature-pollout-notification-on-tun-devices.patch @@ -0,0 +1,78 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Hannes Frederic Sowa +Date: Mon, 13 Mar 2017 00:00:26 +0100 +Subject: tun: fix premature POLLOUT notification on tun devices + +From: Hannes Frederic Sowa + + +[ Upstream commit b20e2d54789c6acbf6bd0efdbec2cf5fa4d90ef1 ] + +aszlig observed failing ssh tunnels (-w) during initialization since +commit cc9da6cc4f56e0 ("ipv6: addrconf: use stable address generator for +ARPHRD_NONE"). We already had reports that the mentioned commit breaks +Juniper VPN connections. I can't clearly say that the Juniper VPN client +has the same problem, but it is worth a try to hint to this patch. + +Because of the early generation of link local addresses, the kernel now +can start asking for routers on the local subnet much earlier than usual. +Those router solicitation packets arrive inside the ssh channels and +should be transmitted to the tun fd before the configuration scripts +might have upped the interface and made it ready for transmission. + +ssh polls on the interface and receives back a POLL_OUT. It tries to send +the earily router solicitation packet to the tun interface. Unfortunately +it hasn't been up'ed yet by config scripts, thus failing with -EIO. ssh +doesn't retry again and considers the tun interface broken forever. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=121131 +Fixes: cc9da6cc4f56 ("ipv6: addrconf: use stable address generator for ARPHRD_NONE") +Cc: Bjørn Mork +Reported-by: Valdis Kletnieks +Cc: Valdis Kletnieks +Reported-by: Jonas Lippuner +Cc: Jonas Lippuner +Reported-by: aszlig +Cc: aszlig +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -819,7 +819,18 @@ static void tun_net_uninit(struct net_de + /* Net device open. */ + static int tun_net_open(struct net_device *dev) + { ++ struct tun_struct *tun = netdev_priv(dev); ++ int i; ++ + netif_tx_start_all_queues(dev); ++ ++ for (i = 0; i < tun->numqueues; i++) { ++ struct tun_file *tfile; ++ ++ tfile = rtnl_dereference(tun->tfiles[i]); ++ tfile->socket.sk->sk_write_space(tfile->socket.sk); ++ } ++ + return 0; + } + +@@ -1101,9 +1112,10 @@ static unsigned int tun_chr_poll(struct + if (!skb_array_empty(&tfile->tx_array)) + mask |= POLLIN | POLLRDNORM; + +- if (sock_writeable(sk) || +- (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && +- sock_writeable(sk))) ++ if (tun->dev->flags & IFF_UP && ++ (sock_writeable(sk) || ++ (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && ++ sock_writeable(sk)))) + mask |= POLLOUT | POLLWRNORM; + + if (tun->dev->reg_state != NETREG_REGISTERED) diff --git a/queue-4.10/uapi-fix-linux-packet_diag.h-userspace-compilation-error.patch b/queue-4.10/uapi-fix-linux-packet_diag.h-userspace-compilation-error.patch new file mode 100644 index 00000000000..ea99e9f9291 --- /dev/null +++ b/queue-4.10/uapi-fix-linux-packet_diag.h-userspace-compilation-error.patch @@ -0,0 +1,44 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: "Dmitry V. Levin" +Date: Tue, 7 Mar 2017 23:50:50 +0300 +Subject: uapi: fix linux/packet_diag.h userspace compilation error + +From: "Dmitry V. Levin" + + +[ Upstream commit 745cb7f8a5de0805cade3de3991b7a95317c7c73 ] + +Replace MAX_ADDR_LEN with its numeric value to fix the following +linux/packet_diag.h userspace compilation error: + +/usr/include/linux/packet_diag.h:67:17: error: 'MAX_ADDR_LEN' undeclared here (not in a function) + __u8 pdmc_addr[MAX_ADDR_LEN]; + +This is not the first case in the UAPI where the numeric value +of MAX_ADDR_LEN is used instead of symbolic one, uapi/linux/if_link.h +already does the same: + +$ grep MAX_ADDR_LEN include/uapi/linux/if_link.h + __u8 mac[32]; /* MAX_ADDR_LEN */ + +There are no UAPI headers besides these two that use MAX_ADDR_LEN. + +Signed-off-by: Dmitry V. Levin +Acked-by: Pavel Emelyanov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/packet_diag.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/uapi/linux/packet_diag.h ++++ b/include/uapi/linux/packet_diag.h +@@ -64,7 +64,7 @@ struct packet_diag_mclist { + __u32 pdmc_count; + __u16 pdmc_type; + __u16 pdmc_alen; +- __u8 pdmc_addr[MAX_ADDR_LEN]; ++ __u8 pdmc_addr[32]; /* MAX_ADDR_LEN */ + }; + + struct packet_diag_ring { diff --git a/queue-4.10/vrf-fix-use-after-free-in-vrf_xmit.patch b/queue-4.10/vrf-fix-use-after-free-in-vrf_xmit.patch new file mode 100644 index 00000000000..7de16b9f58e --- /dev/null +++ b/queue-4.10/vrf-fix-use-after-free-in-vrf_xmit.patch @@ -0,0 +1,56 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: David Ahern +Date: Mon, 6 Mar 2017 08:53:04 -0800 +Subject: vrf: Fix use-after-free in vrf_xmit + +From: David Ahern + + +[ Upstream commit f7887d40e541f74402df0684a1463c0a0bb68c68 ] + +KASAN detected a use-after-free: + +[ 269.467067] BUG: KASAN: use-after-free in vrf_xmit+0x7f1/0x827 [vrf] at addr ffff8800350a21c0 +[ 269.467067] Read of size 4 by task ssh/1879 +[ 269.467067] CPU: 1 PID: 1879 Comm: ssh Not tainted 4.10.0+ #249 +[ 269.467067] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 +[ 269.467067] Call Trace: +[ 269.467067] dump_stack+0x81/0xb6 +[ 269.467067] kasan_object_err+0x21/0x78 +[ 269.467067] kasan_report+0x2f7/0x450 +[ 269.467067] ? vrf_xmit+0x7f1/0x827 [vrf] +[ 269.467067] ? ip_output+0xa4/0xdb +[ 269.467067] __asan_load4+0x6b/0x6d +[ 269.467067] vrf_xmit+0x7f1/0x827 [vrf] +... + +Which corresponds to the skb access after xmit handling. Fix by saving +skb->len and using the saved value to update stats. + +Fixes: 193125dbd8eb2 ("net: Introduce VRF device driver") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vrf.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -341,6 +341,7 @@ static netdev_tx_t is_ip_tx_frame(struct + + static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) + { ++ int len = skb->len; + netdev_tx_t ret = is_ip_tx_frame(skb, dev); + + if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { +@@ -348,7 +349,7 @@ static netdev_tx_t vrf_xmit(struct sk_bu + + u64_stats_update_begin(&dstats->syncp); + dstats->tx_pkts++; +- dstats->tx_bytes += skb->len; ++ dstats->tx_bytes += len; + u64_stats_update_end(&dstats->syncp); + } else { + this_cpu_inc(dev->dstats->tx_drps); diff --git a/queue-4.10/vti6-return-gre_key-for-vti6.patch b/queue-4.10/vti6-return-gre_key-for-vti6.patch new file mode 100644 index 00000000000..aed267e1a4a --- /dev/null +++ b/queue-4.10/vti6-return-gre_key-for-vti6.patch @@ -0,0 +1,33 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: David Forster +Date: Fri, 24 Feb 2017 14:20:32 +0000 +Subject: vti6: return GRE_KEY for vti6 + +From: David Forster + + +[ Upstream commit 7dcdf941cdc96692ab99fd790c8cc68945514851 ] + +Align vti6 with vti by returning GRE_KEY flag. This enables iproute2 +to display tunnel keys on "ip -6 tunnel show" + +Signed-off-by: David Forster +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_vti.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv6/ip6_vti.c ++++ b/net/ipv6/ip6_vti.c +@@ -692,6 +692,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 * + u->link = p->link; + u->i_key = p->i_key; + u->o_key = p->o_key; ++ if (u->i_key) ++ u->i_flags |= GRE_KEY; ++ if (u->o_key) ++ u->o_flags |= GRE_KEY; + u->proto = p->proto; + + memcpy(u->name, p->name, sizeof(u->name)); diff --git a/queue-4.10/vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch b/queue-4.10/vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch new file mode 100644 index 00000000000..f5462549153 --- /dev/null +++ b/queue-4.10/vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch @@ -0,0 +1,33 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Matthias Schiffer +Date: Thu, 23 Feb 2017 17:19:41 +0100 +Subject: vxlan: correctly validate VXLAN ID against VXLAN_N_VID + +From: Matthias Schiffer + + +[ Upstream commit 4e37d6911f36545b286d15073f6f2222f840e81c ] + +The incorrect check caused an off-by-one error: the maximum VID 0xffffff +was unusable. + +Fixes: d342894c5d2f ("vxlan: virtual extensible lan") +Signed-off-by: Matthias Schiffer +Acked-by: Jiri Benc +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -2626,7 +2626,7 @@ static int vxlan_validate(struct nlattr + + if (data[IFLA_VXLAN_ID]) { + __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); +- if (id >= VXLAN_VID_MASK) ++ if (id >= VXLAN_N_VID) + return -ERANGE; + } + diff --git a/queue-4.10/vxlan-don-t-allow-overwrite-of-config-src-addr.patch b/queue-4.10/vxlan-don-t-allow-overwrite-of-config-src-addr.patch new file mode 100644 index 00000000000..cc8a07665d6 --- /dev/null +++ b/queue-4.10/vxlan-don-t-allow-overwrite-of-config-src-addr.patch @@ -0,0 +1,94 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Brian Russell +Date: Fri, 24 Feb 2017 17:47:11 +0000 +Subject: vxlan: don't allow overwrite of config src addr + +From: Brian Russell + + +[ Upstream commit 1158632b5a2dcce0786c1b1b99654e81cc867981 ] + +When using IPv6 transport and a default dst, a pointer to the configured +source address is passed into the route lookup. If no source address is +configured, then the value is overwritten. + +IPv6 route lookup ignores egress ifindex match if the source address is set, +so if egress ifindex match is desired, the source address must be passed +as any. The overwrite breaks this for subsequent lookups. + +Avoid this by copying the configured address to an existing stack variable +and pass a pointer to that instead. + +Fixes: 272d96a5ab10 ("net: vxlan: lwt: Use source ip address during route lookup.") + +Signed-off-by: Brian Russell +Acked-by: Jiri Benc +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -1992,7 +1992,6 @@ static void vxlan_xmit_one(struct sk_buf + const struct iphdr *old_iph = ip_hdr(skb); + union vxlan_addr *dst; + union vxlan_addr remote_ip, local_ip; +- union vxlan_addr *src; + struct vxlan_metadata _md; + struct vxlan_metadata *md = &_md; + __be16 src_port = 0, dst_port; +@@ -2019,7 +2018,7 @@ static void vxlan_xmit_one(struct sk_buf + + dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; + vni = rdst->remote_vni; +- src = &vxlan->cfg.saddr; ++ local_ip = vxlan->cfg.saddr; + dst_cache = &rdst->dst_cache; + md->gbp = skb->mark; + ttl = vxlan->cfg.ttl; +@@ -2052,7 +2051,6 @@ static void vxlan_xmit_one(struct sk_buf + dst = &remote_ip; + dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; + vni = tunnel_id_to_key32(info->key.tun_id); +- src = &local_ip; + dst_cache = &info->dst_cache; + if (info->options_len) + md = ip_tunnel_info_opts(info); +@@ -2072,7 +2070,7 @@ static void vxlan_xmit_one(struct sk_buf + rt = vxlan_get_route(vxlan, dev, sock4, skb, + rdst ? rdst->remote_ifindex : 0, tos, + dst->sin.sin_addr.s_addr, +- &src->sin.sin_addr.s_addr, ++ &local_ip.sin.sin_addr.s_addr, + dst_port, src_port, + dst_cache, info); + if (IS_ERR(rt)) { +@@ -2099,7 +2097,7 @@ static void vxlan_xmit_one(struct sk_buf + if (err < 0) + goto tx_error; + +- udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, src->sin.sin_addr.s_addr, ++ udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, local_ip.sin.sin_addr.s_addr, + dst->sin.sin_addr.s_addr, tos, ttl, df, + src_port, dst_port, xnet, !udp_sum); + #if IS_ENABLED(CONFIG_IPV6) +@@ -2109,7 +2107,7 @@ static void vxlan_xmit_one(struct sk_buf + ndst = vxlan6_get_route(vxlan, dev, sock6, skb, + rdst ? rdst->remote_ifindex : 0, tos, + label, &dst->sin6.sin6_addr, +- &src->sin6.sin6_addr, ++ &local_ip.sin6.sin6_addr, + dst_port, src_port, + dst_cache, info); + if (IS_ERR(ndst)) { +@@ -2137,7 +2135,7 @@ static void vxlan_xmit_one(struct sk_buf + goto tx_error; + + udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev, +- &src->sin6.sin6_addr, ++ &local_ip.sin6.sin6_addr, + &dst->sin6.sin6_addr, tos, ttl, + label, src_port, dst_port, !udp_sum); + #endif diff --git a/queue-4.10/vxlan-lock-rcu-on-tx-path.patch b/queue-4.10/vxlan-lock-rcu-on-tx-path.patch new file mode 100644 index 00000000000..e20b71fd83a --- /dev/null +++ b/queue-4.10/vxlan-lock-rcu-on-tx-path.patch @@ -0,0 +1,66 @@ +From foo@baz Sat Mar 18 22:03:53 CST 2017 +From: Jakub Kicinski +Date: Fri, 24 Feb 2017 11:43:36 -0800 +Subject: vxlan: lock RCU on TX path + +From: Jakub Kicinski + + +[ Upstream commit 56de859e9967c070464a9a9f4f18d73f9447298e ] + +There is no guarantees that callers of the TX path will hold +the RCU lock. Grab it explicitly. + +Fixes: c6fcc4fc5f8b ("vxlan: avoid using stale vxlan socket.") +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -2062,6 +2062,7 @@ static void vxlan_xmit_one(struct sk_buf + src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min, + vxlan->cfg.port_max, true); + ++ rcu_read_lock(); + if (dst->sa.sa_family == AF_INET) { + struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); + struct rtable *rt; +@@ -2084,7 +2085,7 @@ static void vxlan_xmit_one(struct sk_buf + dst_port, vni, &rt->dst, + rt->rt_flags); + if (err) +- return; ++ goto out_unlock; + } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) { + df = htons(IP_DF); + } +@@ -2123,7 +2124,7 @@ static void vxlan_xmit_one(struct sk_buf + dst_port, vni, ndst, + rt6i_flags); + if (err) +- return; ++ goto out_unlock; + } + + tos = ip_tunnel_ecn_encap(tos, old_iph, skb); +@@ -2140,6 +2141,8 @@ static void vxlan_xmit_one(struct sk_buf + label, src_port, dst_port, !udp_sum); + #endif + } ++out_unlock: ++ rcu_read_unlock(); + return; + + drop: +@@ -2148,6 +2151,7 @@ drop: + return; + + tx_error: ++ rcu_read_unlock(); + if (err == -ELOOP) + dev->stats.collisions++; + else if (err == -ENETUNREACH) diff --git a/queue-4.9/series b/queue-4.9/series new file mode 100644 index 00000000000..3253da6f618 --- /dev/null +++ b/queue-4.9/series @@ -0,0 +1,41 @@ +net-mlx5e-register-unregister-vport-representors-on-interface-attach-detach.patch +net-mlx5e-do-not-reduce-lro-wqe-size-when-not-using-build_skb.patch +net-mlx5e-fix-wrong-cqe-decompression.patch +vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch +vti6-return-gre_key-for-vti6.patch +vxlan-don-t-allow-overwrite-of-config-src-addr.patch +ipv4-mask-tos-for-input-route.patch +net-sched-actions-decrement-module-reference-count-after-table-flush.patch +l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch +net-phy-avoid-deadlock-during-phy_error.patch +vxlan-lock-rcu-on-tx-path.patch +geneve-lock-rcu-on-tx-path.patch +mlxsw-spectrum_router-avoid-potential-packets-loss.patch +tcp-dccp-block-bh-for-syn-processing.patch +net-bridge-allow-ipv6-when-multicast-flood-is-disabled.patch +net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch +net-net_enable_timestamp-can-be-called-from-irq-contexts.patch +ipv6-orphan-skbs-in-reassembly-unit.patch +dccp-unlock-sock-before-calling-sk_free.patch +strparser-destroy-workqueue-on-module-exit.patch +tcp-fix-various-issues-for-sockets-morphing-to-listen-state.patch +net-fix-socket-refcounting-in-skb_complete_wifi_ack.patch +net-fix-socket-refcounting-in-skb_complete_tx_timestamp.patch +net-sched-act_skbmod-remove-unneeded-rcu_read_unlock-in-tcf_skbmod_dump.patch +dccp-fix-use-after-free-in-dccp_feat_activate_values.patch +vrf-fix-use-after-free-in-vrf_xmit.patch +net-tunnel-set-inner-protocol-in-network-gro-hooks.patch +uapi-fix-linux-packet_diag.h-userspace-compilation-error.patch +act_connmark-avoid-crashing-on-malformed-nlattrs-with-null-parms.patch +mpls-send-route-delete-notifications-when-router-module-is-unloaded.patch +mpls-do-not-decrement-alive-counter-for-unregister-events.patch +ipv6-make-ecmp-route-replacement-less-greedy.patch +ipv6-avoid-write-to-a-possibly-cloned-skb.patch +bridge-drop-netfilter-fake-rtable-unconditionally.patch +dccp-tcp-fix-routing-redirect-race.patch +tun-fix-premature-pollout-notification-on-tun-devices.patch +dccp-fix-memory-leak-during-tear-down-of-unsuccessful-connection-request.patch +bpf-detect-identical-ptr_to_map_value_or_null-registers.patch +bpf-fix-state-equivalence.patch +bpf-fix-regression-on-verifier-pruning-wrt-map-lookups.patch +bpf-fix-mark_reg_unknown_value-for-spilled-regs-on-map-value-marking.patch -- 2.47.3