From: Greg Kroah-Hartman Date: Sun, 8 Sep 2019 08:07:48 +0000 (+0100) Subject: 5.2-stable patches X-Git-Tag: v4.4.192~9 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e14dfe63096c9a1d9076921dc4caccfaccc529ae;p=thirdparty%2Fkernel%2Fstable-queue.git 5.2-stable patches added patches: add-genphy_c45_config_aneg-function-to-phy-c45.c.patch mld-fix-memory-leak-in-mld_del_delrec.patch net-dsa-tag_8021q-future-proof-the-reserved-fields-in-the-custom-vid.patch net-fix-skb-use-after-free-in-netpoll.patch net-rds-fix-info-leak-in-rds6_inc_info_copy.patch net-sched-act_sample-fix-psample-group-handling-on-overwrite.patch net-sched-cbs-set-default-link-speed-to-10-mbps-in-cbs_set_port_rate.patch net-sched-pfifo_fast-fix-wrong-dereference-in-pfifo_fast_enqueue.patch net-sched-pfifo_fast-fix-wrong-dereference-when-qdisc-is-reset.patch net-stmmac-dwmac-rk-don-t-fail-if-phy-regulator-is-absent.patch net_sched-fix-a-null-pointer-deref-in-ipt-action.patch nfp-flower-handle-neighbour-events-on-internal-ports.patch nfp-flower-prevent-ingress-block-binds-on-internal-ports.patch r8152-remove-calling-netif_napi_del.patch revert-r8152-napi-hangup-fix-after-disconnect.patch taprio-fix-kernel-panic-in-taprio_destroy.patch taprio-set-default-link-speed-to-10-mbps-in-taprio_set_picos_per_byte.patch tcp-inherit-timestamp-on-mtu-probe.patch tcp-remove-empty-skb-from-write-queue-in-error-cases.patch --- diff --git a/queue-5.2/add-genphy_c45_config_aneg-function-to-phy-c45.c.patch b/queue-5.2/add-genphy_c45_config_aneg-function-to-phy-c45.c.patch new file mode 100644 index 00000000000..3bc2968ed53 --- /dev/null +++ b/queue-5.2/add-genphy_c45_config_aneg-function-to-phy-c45.c.patch @@ -0,0 +1,91 @@ +From foo@baz Sun 08 Sep 2019 09:06:34 AM WEST +From: Marco Hartmann +Date: Wed, 21 Aug 2019 11:00:46 +0000 +Subject: Add genphy_c45_config_aneg() function to phy-c45.c + +From: Marco Hartmann + +[ Upstream commit 2ebb991641d3f64b70fec0156e2b6933810177e9 ] + +Commit 34786005eca3 ("net: phy: prevent PHYs w/o Clause 22 regs from calling +genphy_config_aneg") introduced a check that aborts phy_config_aneg() +if the phy is a C45 phy. +This causes phy_state_machine() to call phy_error() so that the phy +ends up in PHY_HALTED state. + +Instead of returning -EOPNOTSUPP, call genphy_c45_config_aneg() +(analogous to the C22 case) so that the state machine can run +correctly. + +genphy_c45_config_aneg() closely resembles mv3310_config_aneg() +in drivers/net/phy/marvell10g.c, excluding vendor specific +configurations for 1000BaseT. + +Fixes: 22b56e827093 ("net: phy: replace genphy_10g_driver with genphy_c45_driver") + +Signed-off-by: Marco Hartmann +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy-c45.c | 26 ++++++++++++++++++++++++++ + drivers/net/phy/phy.c | 2 +- + include/linux/phy.h | 1 + + 3 files changed, 28 insertions(+), 1 deletion(-) + +--- a/drivers/net/phy/phy-c45.c ++++ b/drivers/net/phy/phy-c45.c +@@ -523,6 +523,32 @@ int genphy_c45_read_status(struct phy_de + } + EXPORT_SYMBOL_GPL(genphy_c45_read_status); + ++/** ++ * genphy_c45_config_aneg - restart auto-negotiation or forced setup ++ * @phydev: target phy_device struct ++ * ++ * Description: If auto-negotiation is enabled, we configure the ++ * advertising, and then restart auto-negotiation. If it is not ++ * enabled, then we force a configuration. ++ */ ++int genphy_c45_config_aneg(struct phy_device *phydev) ++{ ++ bool changed = false; ++ int ret; ++ ++ if (phydev->autoneg == AUTONEG_DISABLE) ++ return genphy_c45_pma_setup_forced(phydev); ++ ++ ret = genphy_c45_an_config_aneg(phydev); ++ if (ret < 0) ++ return ret; ++ if (ret > 0) ++ changed = true; ++ ++ return genphy_c45_check_and_restart_aneg(phydev, changed); ++} ++EXPORT_SYMBOL_GPL(genphy_c45_config_aneg); ++ + /* The gen10g_* functions are the old Clause 45 stub */ + + int gen10g_config_aneg(struct phy_device *phydev) +--- a/drivers/net/phy/phy.c ++++ b/drivers/net/phy/phy.c +@@ -499,7 +499,7 @@ static int phy_config_aneg(struct phy_de + * allowed to call genphy_config_aneg() + */ + if (phydev->is_c45 && !(phydev->c45_ids.devices_in_package & BIT(0))) +- return -EOPNOTSUPP; ++ return genphy_c45_config_aneg(phydev); + + return genphy_config_aneg(phydev); + } +--- a/include/linux/phy.h ++++ b/include/linux/phy.h +@@ -1108,6 +1108,7 @@ int genphy_c45_an_disable_aneg(struct ph + int genphy_c45_read_mdix(struct phy_device *phydev); + int genphy_c45_pma_read_abilities(struct phy_device *phydev); + int genphy_c45_read_status(struct phy_device *phydev); ++int genphy_c45_config_aneg(struct phy_device *phydev); + + /* The gen10g_* functions are the old Clause 45 stub */ + int gen10g_config_aneg(struct phy_device *phydev); diff --git a/queue-5.2/mld-fix-memory-leak-in-mld_del_delrec.patch b/queue-5.2/mld-fix-memory-leak-in-mld_del_delrec.patch new file mode 100644 index 00000000000..3f04cc0d949 --- /dev/null +++ b/queue-5.2/mld-fix-memory-leak-in-mld_del_delrec.patch @@ -0,0 +1,73 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: Eric Dumazet +Date: Tue, 27 Aug 2019 03:33:12 -0700 +Subject: mld: fix memory leak in mld_del_delrec() + +From: Eric Dumazet + +[ Upstream commit a84d016479896b5526a2cc54784e6ffc41c9d6f6 ] + +Similar to the fix done for IPv4 in commit e5b1c6c6277d +("igmp: fix memory leak in igmpv3_del_delrec()"), we need to +make sure mca_tomb and mca_sources are not blindly overwritten. + +Using swap() then a call to ip6_mc_clear_src() will take care +of the missing free. + +BUG: memory leak +unreferenced object 0xffff888117d9db00 (size 64): + comm "syz-executor247", pid 6918, jiffies 4294943989 (age 25.350s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 fe 88 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [<000000005b463030>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] + [<000000005b463030>] slab_post_alloc_hook mm/slab.h:522 [inline] + [<000000005b463030>] slab_alloc mm/slab.c:3319 [inline] + [<000000005b463030>] kmem_cache_alloc_trace+0x145/0x2c0 mm/slab.c:3548 + [<00000000939cbf94>] kmalloc include/linux/slab.h:552 [inline] + [<00000000939cbf94>] kzalloc include/linux/slab.h:748 [inline] + [<00000000939cbf94>] ip6_mc_add1_src net/ipv6/mcast.c:2236 [inline] + [<00000000939cbf94>] ip6_mc_add_src+0x31f/0x420 net/ipv6/mcast.c:2356 + [<00000000d8972221>] ip6_mc_source+0x4a8/0x600 net/ipv6/mcast.c:449 + [<000000002b203d0d>] do_ipv6_setsockopt.isra.0+0x1b92/0x1dd0 net/ipv6/ipv6_sockglue.c:748 + [<000000001f1e2d54>] ipv6_setsockopt+0x89/0xd0 net/ipv6/ipv6_sockglue.c:944 + [<00000000c8f7bdf9>] udpv6_setsockopt+0x4e/0x90 net/ipv6/udp.c:1558 + [<000000005a9a0c5e>] sock_common_setsockopt+0x38/0x50 net/core/sock.c:3139 + [<00000000910b37b2>] __sys_setsockopt+0x10f/0x220 net/socket.c:2084 + [<00000000e9108023>] __do_sys_setsockopt net/socket.c:2100 [inline] + [<00000000e9108023>] __se_sys_setsockopt net/socket.c:2097 [inline] + [<00000000e9108023>] __x64_sys_setsockopt+0x26/0x30 net/socket.c:2097 + [<00000000f4818160>] do_syscall_64+0x76/0x1a0 arch/x86/entry/common.c:296 + [<000000008d367e8f>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Fixes: 1666d49e1d41 ("mld: do not remove mld souce list info when set link down") +Fixes: 9c8bb163ae78 ("igmp, mld: Fix memory leak in igmpv3/mld_del_delrec()") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/mcast.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/ipv6/mcast.c ++++ b/net/ipv6/mcast.c +@@ -787,14 +787,15 @@ static void mld_del_delrec(struct inet6_ + if (pmc) { + im->idev = pmc->idev; + if (im->mca_sfmode == MCAST_INCLUDE) { +- im->mca_tomb = pmc->mca_tomb; +- im->mca_sources = pmc->mca_sources; ++ swap(im->mca_tomb, pmc->mca_tomb); ++ swap(im->mca_sources, pmc->mca_sources); + for (psf = im->mca_sources; psf; psf = psf->sf_next) + psf->sf_crcount = idev->mc_qrv; + } else { + im->mca_crcount = idev->mc_qrv; + } + in6_dev_put(pmc->idev); ++ ip6_mc_clear_src(pmc); + kfree(pmc); + } + spin_unlock_bh(&im->mca_lock); diff --git a/queue-5.2/net-dsa-tag_8021q-future-proof-the-reserved-fields-in-the-custom-vid.patch b/queue-5.2/net-dsa-tag_8021q-future-proof-the-reserved-fields-in-the-custom-vid.patch new file mode 100644 index 00000000000..ffc9980f47f --- /dev/null +++ b/queue-5.2/net-dsa-tag_8021q-future-proof-the-reserved-fields-in-the-custom-vid.patch @@ -0,0 +1,47 @@ +From foo@baz Sun 08 Sep 2019 09:06:34 AM WEST +From: Vladimir Oltean +Date: Sun, 25 Aug 2019 21:32:12 +0300 +Subject: net: dsa: tag_8021q: Future-proof the reserved fields in the custom VID + +From: Vladimir Oltean + +[ Upstream commit bcccb0a535bb99616e4b992568371efab1ab14e8 ] + +After witnessing the discussion in https://lkml.org/lkml/2019/8/14/151 +w.r.t. ioctl extensibility, it became clear that such an issue might +prevent that the 3 RSV bits inside the DSA 802.1Q tag might also suffer +the same fate and be useless for further extension. + +So clearly specify that the reserved bits should currently be +transmitted as zero and ignored on receive. The DSA tagger already does +this (and has always did), and is the only known user so far (no +Wireshark dissection plugin, etc). So there should be no incompatibility +to speak of. + +Fixes: 0471dd429cea ("net: dsa: tag_8021q: Create a stable binary format") +Signed-off-by: Vladimir Oltean +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dsa/tag_8021q.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/dsa/tag_8021q.c ++++ b/net/dsa/tag_8021q.c +@@ -28,6 +28,7 @@ + * + * RSV - VID[9]: + * To be used for further expansion of SWITCH_ID or for other purposes. ++ * Must be transmitted as zero and ignored on receive. + * + * SWITCH_ID - VID[8:6]: + * Index of switch within DSA tree. Must be between 0 and +@@ -35,6 +36,7 @@ + * + * RSV - VID[5:4]: + * To be used for further expansion of PORT or for other purposes. ++ * Must be transmitted as zero and ignored on receive. + * + * PORT - VID[3:0]: + * Index of switch port. Must be between 0 and DSA_MAX_PORTS - 1. diff --git a/queue-5.2/net-fix-skb-use-after-free-in-netpoll.patch b/queue-5.2/net-fix-skb-use-after-free-in-netpoll.patch new file mode 100644 index 00000000000..de0538a0945 --- /dev/null +++ b/queue-5.2/net-fix-skb-use-after-free-in-netpoll.patch @@ -0,0 +1,90 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: Feng Sun +Date: Mon, 26 Aug 2019 14:46:04 +0800 +Subject: net: fix skb use after free in netpoll + +From: Feng Sun + +[ Upstream commit 2c1644cf6d46a8267d79ed95cb9b563839346562 ] + +After commit baeababb5b85d5c4e6c917efe2a1504179438d3b +("tun: return NET_XMIT_DROP for dropped packets"), +when tun_net_xmit drop packets, it will free skb and return NET_XMIT_DROP, +netpoll_send_skb_on_dev will run into following use after free cases: +1. retry netpoll_start_xmit with freed skb; +2. queue freed skb in npinfo->txq. +queue_process will also run into use after free case. + +hit netpoll_send_skb_on_dev first case with following kernel log: + +[ 117.864773] kernel BUG at mm/slub.c:306! +[ 117.864773] invalid opcode: 0000 [#1] SMP PTI +[ 117.864774] CPU: 3 PID: 2627 Comm: loop_printmsg Kdump: loaded Tainted: P OE 5.3.0-050300rc5-generic #201908182231 +[ 117.864775] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 +[ 117.864775] RIP: 0010:kmem_cache_free+0x28d/0x2b0 +[ 117.864781] Call Trace: +[ 117.864781] ? tun_net_xmit+0x21c/0x460 +[ 117.864781] kfree_skbmem+0x4e/0x60 +[ 117.864782] kfree_skb+0x3a/0xa0 +[ 117.864782] tun_net_xmit+0x21c/0x460 +[ 117.864782] netpoll_start_xmit+0x11d/0x1b0 +[ 117.864788] netpoll_send_skb_on_dev+0x1b8/0x200 +[ 117.864789] __br_forward+0x1b9/0x1e0 [bridge] +[ 117.864789] ? skb_clone+0x53/0xd0 +[ 117.864790] ? __skb_clone+0x2e/0x120 +[ 117.864790] deliver_clone+0x37/0x50 [bridge] +[ 117.864790] maybe_deliver+0x89/0xc0 [bridge] +[ 117.864791] br_flood+0x6c/0x130 [bridge] +[ 117.864791] br_dev_xmit+0x315/0x3c0 [bridge] +[ 117.864792] netpoll_start_xmit+0x11d/0x1b0 +[ 117.864792] netpoll_send_skb_on_dev+0x1b8/0x200 +[ 117.864792] netpoll_send_udp+0x2c6/0x3e8 +[ 117.864793] write_msg+0xd9/0xf0 [netconsole] +[ 117.864793] console_unlock+0x386/0x4e0 +[ 117.864793] vprintk_emit+0x17e/0x280 +[ 117.864794] vprintk_default+0x29/0x50 +[ 117.864794] vprintk_func+0x4c/0xbc +[ 117.864794] printk+0x58/0x6f +[ 117.864795] loop_fun+0x24/0x41 [printmsg_loop] +[ 117.864795] kthread+0x104/0x140 +[ 117.864795] ? 0xffffffffc05b1000 +[ 117.864796] ? kthread_park+0x80/0x80 +[ 117.864796] ret_from_fork+0x35/0x40 + +Signed-off-by: Feng Sun +Signed-off-by: Xiaojun Zhao +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/netpoll.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/core/netpoll.c ++++ b/net/core/netpoll.c +@@ -122,7 +122,7 @@ static void queue_process(struct work_st + txq = netdev_get_tx_queue(dev, q_index); + HARD_TX_LOCK(dev, txq, smp_processor_id()); + if (netif_xmit_frozen_or_stopped(txq) || +- netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) { ++ !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) { + skb_queue_head(&npinfo->txq, skb); + HARD_TX_UNLOCK(dev, txq); + local_irq_restore(flags); +@@ -335,7 +335,7 @@ void netpoll_send_skb_on_dev(struct netp + + HARD_TX_UNLOCK(dev, txq); + +- if (status == NETDEV_TX_OK) ++ if (dev_xmit_complete(status)) + break; + + } +@@ -352,7 +352,7 @@ void netpoll_send_skb_on_dev(struct netp + + } + +- if (status != NETDEV_TX_OK) { ++ if (!dev_xmit_complete(status)) { + skb_queue_tail(&npinfo->txq, skb); + schedule_delayed_work(&npinfo->tx_work,0); + } diff --git a/queue-5.2/net-rds-fix-info-leak-in-rds6_inc_info_copy.patch b/queue-5.2/net-rds-fix-info-leak-in-rds6_inc_info_copy.patch new file mode 100644 index 00000000000..dfc2482fe92 --- /dev/null +++ b/queue-5.2/net-rds-fix-info-leak-in-rds6_inc_info_copy.patch @@ -0,0 +1,51 @@ +From foo@baz Sun 08 Sep 2019 09:06:34 AM WEST +From: Ka-Cheong Poon +Date: Mon, 26 Aug 2019 02:39:12 -0700 +Subject: net/rds: Fix info leak in rds6_inc_info_copy() + +From: Ka-Cheong Poon + +[ Upstream commit 7d0a06586b2686ba80c4a2da5f91cb10ffbea736 ] + +The rds6_inc_info_copy() function has a couple struct members which +are leaking stack information. The ->tos field should hold actual +information and the ->flags field needs to be zeroed out. + +Fixes: 3eb450367d08 ("rds: add type of service(tos) infrastructure") +Fixes: b7ff8b1036f0 ("rds: Extend RDS API for IPv6 support") +Reported-by: 黄ID蝴蝶 +Signed-off-by: Dan Carpenter +Signed-off-by: Ka-Cheong Poon +Acked-by: Santosh Shilimkar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rds/recv.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/rds/recv.c ++++ b/net/rds/recv.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU +@@ -811,6 +811,7 @@ void rds6_inc_info_copy(struct rds_incom + + minfo6.seq = be64_to_cpu(inc->i_hdr.h_sequence); + minfo6.len = be32_to_cpu(inc->i_hdr.h_len); ++ minfo6.tos = inc->i_conn->c_tos; + + if (flip) { + minfo6.laddr = *daddr; +@@ -824,6 +825,8 @@ void rds6_inc_info_copy(struct rds_incom + minfo6.fport = inc->i_hdr.h_dport; + } + ++ minfo6.flags = 0; ++ + rds_info_copy(iter, &minfo6, sizeof(minfo6)); + } + #endif diff --git a/queue-5.2/net-sched-act_sample-fix-psample-group-handling-on-overwrite.patch b/queue-5.2/net-sched-act_sample-fix-psample-group-handling-on-overwrite.patch new file mode 100644 index 00000000000..dd1eb5c2a3f --- /dev/null +++ b/queue-5.2/net-sched-act_sample-fix-psample-group-handling-on-overwrite.patch @@ -0,0 +1,78 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: Vlad Buslov +Date: Tue, 27 Aug 2019 21:49:38 +0300 +Subject: net: sched: act_sample: fix psample group handling on overwrite + +From: Vlad Buslov + +[ Upstream commit dbf47a2a094edf58983265e323ca4bdcdb58b5ee ] + +Action sample doesn't properly handle psample_group pointer in overwrite +case. Following issues need to be fixed: + +- In tcf_sample_init() function RCU_INIT_POINTER() is used to set + s->psample_group, even though we neither setting the pointer to NULL, nor + preventing concurrent readers from accessing the pointer in some way. + Use rcu_swap_protected() instead to safely reset the pointer. + +- Old value of s->psample_group is not released or deallocated in any way, + which results resource leak. Use psample_group_put() on non-NULL value + obtained with rcu_swap_protected(). + +- The function psample_group_put() that released reference to struct + psample_group pointed by rcu-pointer s->psample_group doesn't respect rcu + grace period when deallocating it. Extend struct psample_group with rcu + head and use kfree_rcu when freeing it. + +Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action") +Signed-off-by: Vlad Buslov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/psample.h | 1 + + net/psample/psample.c | 2 +- + net/sched/act_sample.c | 6 +++++- + 3 files changed, 7 insertions(+), 2 deletions(-) + +--- a/include/net/psample.h ++++ b/include/net/psample.h +@@ -11,6 +11,7 @@ struct psample_group { + u32 group_num; + u32 refcount; + u32 seq; ++ struct rcu_head rcu; + }; + + struct psample_group *psample_group_get(struct net *net, u32 group_num); +--- a/net/psample/psample.c ++++ b/net/psample/psample.c +@@ -154,7 +154,7 @@ static void psample_group_destroy(struct + { + psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP); + list_del(&group->list); +- kfree(group); ++ kfree_rcu(group, rcu); + } + + static struct psample_group * +--- a/net/sched/act_sample.c ++++ b/net/sched/act_sample.c +@@ -102,13 +102,17 @@ static int tcf_sample_init(struct net *n + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); + s->rate = rate; + s->psample_group_num = psample_group_num; +- RCU_INIT_POINTER(s->psample_group, psample_group); ++ rcu_swap_protected(s->psample_group, psample_group, ++ lockdep_is_held(&s->tcf_lock)); + + if (tb[TCA_SAMPLE_TRUNC_SIZE]) { + s->truncate = true; + s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]); + } + spin_unlock_bh(&s->tcf_lock); ++ ++ if (psample_group) ++ psample_group_put(psample_group); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + diff --git a/queue-5.2/net-sched-cbs-set-default-link-speed-to-10-mbps-in-cbs_set_port_rate.patch b/queue-5.2/net-sched-cbs-set-default-link-speed-to-10-mbps-in-cbs_set_port_rate.patch new file mode 100644 index 00000000000..84fc8bf42a1 --- /dev/null +++ b/queue-5.2/net-sched-cbs-set-default-link-speed-to-10-mbps-in-cbs_set_port_rate.patch @@ -0,0 +1,58 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: Vladimir Oltean +Date: Fri, 30 Aug 2019 04:07:23 +0300 +Subject: net/sched: cbs: Set default link speed to 10 Mbps in cbs_set_port_rate + +From: Vladimir Oltean + +The discussion to be made is absolutely the same as in the case of +previous patch ("taprio: Set default link speed to 10 Mbps in +taprio_set_picos_per_byte"). Nothing is lost when setting a default. + +Cc: Leandro Dorileo +Fixes: e0a7683d30e9 ("net/sched: cbs: fix port_rate miscalculation") +Acked-by: Vinicius Costa Gomes +Signed-off-by: Vladimir Oltean +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_cbs.c | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +--- a/net/sched/sch_cbs.c ++++ b/net/sched/sch_cbs.c +@@ -181,11 +181,6 @@ static struct sk_buff *cbs_dequeue_soft( + s64 credits; + int len; + +- if (atomic64_read(&q->port_rate) == -1) { +- WARN_ONCE(1, "cbs: dequeue() called with unknown port rate."); +- return NULL; +- } +- + if (q->credits < 0) { + credits = timediff_to_credits(now - q->last, q->idleslope); + +@@ -303,11 +298,19 @@ static int cbs_enable_offload(struct net + static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q) + { + struct ethtool_link_ksettings ecmd; ++ int speed = SPEED_10; + int port_rate = -1; ++ int err; ++ ++ err = __ethtool_get_link_ksettings(dev, &ecmd); ++ if (err < 0) ++ goto skip; ++ ++ if (ecmd.base.speed != SPEED_UNKNOWN) ++ speed = ecmd.base.speed; + +- if (!__ethtool_get_link_ksettings(dev, &ecmd) && +- ecmd.base.speed != SPEED_UNKNOWN) +- port_rate = ecmd.base.speed * 1000 * BYTES_PER_KBIT; ++skip: ++ port_rate = speed * 1000 * BYTES_PER_KBIT; + + atomic64_set(&q->port_rate, port_rate); + netdev_dbg(dev, "cbs: set %s's port_rate to: %lld, linkspeed: %d\n", diff --git a/queue-5.2/net-sched-pfifo_fast-fix-wrong-dereference-in-pfifo_fast_enqueue.patch b/queue-5.2/net-sched-pfifo_fast-fix-wrong-dereference-in-pfifo_fast_enqueue.patch new file mode 100644 index 00000000000..2b6fd839a18 --- /dev/null +++ b/queue-5.2/net-sched-pfifo_fast-fix-wrong-dereference-in-pfifo_fast_enqueue.patch @@ -0,0 +1,99 @@ +From foo@baz Sun 08 Sep 2019 09:06:34 AM WEST +From: Davide Caratti +Date: Tue, 27 Aug 2019 23:18:53 +0200 +Subject: net/sched: pfifo_fast: fix wrong dereference in pfifo_fast_enqueue + +From: Davide Caratti + +[ Upstream commit 092e22e586236bba106a82113826a68080a03506 ] + +Now that 'TCQ_F_CPUSTATS' bit can be cleared, depending on the value of +'TCQ_F_NOLOCK' bit in the parent qdisc, we can't assume anymore that +per-cpu counters are there in the error path of skb_array_produce(). +Otherwise, the following splat can be seen: + + Unable to handle kernel paging request at virtual address 0000600dea430008 + Mem abort info: + ESR = 0x96000005 + Exception class = DABT (current EL), IL = 32 bits + SET = 0, FnV = 0 + EA = 0, S1PTW = 0 + Data abort info: + ISV = 0, ISS = 0x00000005 + CM = 0, WnR = 0 + user pgtable: 64k pages, 48-bit VAs, pgdp = 000000007b97530e + [0000600dea430008] pgd=0000000000000000, pud=0000000000000000 + Internal error: Oops: 96000005 [#1] SMP +[...] + pstate: 10000005 (nzcV daif -PAN -UAO) + pc : pfifo_fast_enqueue+0x524/0x6e8 + lr : pfifo_fast_enqueue+0x46c/0x6e8 + sp : ffff800d39376fe0 + x29: ffff800d39376fe0 x28: 1ffff001a07d1e40 + x27: ffff800d03e8f188 x26: ffff800d03e8f200 + x25: 0000000000000062 x24: ffff800d393772f0 + x23: 0000000000000000 x22: 0000000000000403 + x21: ffff800cca569a00 x20: ffff800d03e8ee00 + x19: ffff800cca569a10 x18: 00000000000000bf + x17: 0000000000000000 x16: 0000000000000000 + x15: 0000000000000000 x14: ffff1001a726edd0 + x13: 1fffe4000276a9a4 x12: 0000000000000000 + x11: dfff200000000000 x10: ffff800d03e8f1a0 + x9 : 0000000000000003 x8 : 0000000000000000 + x7 : 00000000f1f1f1f1 x6 : ffff1001a726edea + x5 : ffff800cca56a53c x4 : 1ffff001bf9a8003 + x3 : 1ffff001bf9a8003 x2 : 1ffff001a07d1dcb + x1 : 0000600dea430000 x0 : 0000600dea430008 + Process ping (pid: 6067, stack limit = 0x00000000dc0aa557) + Call trace: + pfifo_fast_enqueue+0x524/0x6e8 + htb_enqueue+0x660/0x10e0 [sch_htb] + __dev_queue_xmit+0x123c/0x2de0 + dev_queue_xmit+0x24/0x30 + ip_finish_output2+0xc48/0x1720 + ip_finish_output+0x548/0x9d8 + ip_output+0x334/0x788 + ip_local_out+0x90/0x138 + ip_send_skb+0x44/0x1d0 + ip_push_pending_frames+0x5c/0x78 + raw_sendmsg+0xed8/0x28d0 + inet_sendmsg+0xc4/0x5c0 + sock_sendmsg+0xac/0x108 + __sys_sendto+0x1ac/0x2a0 + __arm64_sys_sendto+0xc4/0x138 + el0_svc_handler+0x13c/0x298 + el0_svc+0x8/0xc + Code: f9402e80 d538d081 91002000 8b010000 (885f7c03) + +Fix this by testing the value of 'TCQ_F_CPUSTATS' bit in 'qdisc->flags', +before dereferencing 'qdisc->cpu_qstats'. + +Fixes: 8a53e616de29 ("net: sched: when clearing NOLOCK, clear TCQ_F_CPUSTATS, too") +CC: Paolo Abeni +CC: Stefano Brivio +Reported-by: Li Shuang +Signed-off-by: Davide Caratti +Acked-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_generic.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -624,8 +624,12 @@ static int pfifo_fast_enqueue(struct sk_ + + err = skb_array_produce(q, skb); + +- if (unlikely(err)) +- return qdisc_drop_cpu(skb, qdisc, to_free); ++ if (unlikely(err)) { ++ if (qdisc_is_percpu_stats(qdisc)) ++ return qdisc_drop_cpu(skb, qdisc, to_free); ++ else ++ return qdisc_drop(skb, qdisc, to_free); ++ } + + qdisc_update_stats_at_enqueue(qdisc, pkt_len); + return NET_XMIT_SUCCESS; diff --git a/queue-5.2/net-sched-pfifo_fast-fix-wrong-dereference-when-qdisc-is-reset.patch b/queue-5.2/net-sched-pfifo_fast-fix-wrong-dereference-when-qdisc-is-reset.patch new file mode 100644 index 00000000000..f48e9d0e47e --- /dev/null +++ b/queue-5.2/net-sched-pfifo_fast-fix-wrong-dereference-when-qdisc-is-reset.patch @@ -0,0 +1,119 @@ +From foo@baz Sun 08 Sep 2019 09:06:34 AM WEST +From: Davide Caratti +Date: Tue, 27 Aug 2019 12:29:09 +0200 +Subject: net/sched: pfifo_fast: fix wrong dereference when qdisc is reset + +From: Davide Caratti + +[ Upstream commit 04d37cf46a773910f75fefaa9f9488f42bfe1fe2 ] + +Now that 'TCQ_F_CPUSTATS' bit can be cleared, depending on the value of +'TCQ_F_NOLOCK' bit in the parent qdisc, we need to be sure that per-cpu +counters are present when 'reset()' is called for pfifo_fast qdiscs. +Otherwise, the following script: + + # tc q a dev lo handle 1: root htb default 100 + # tc c a dev lo parent 1: classid 1:100 htb \ + > rate 95Mbit ceil 100Mbit burst 64k + [...] + # tc f a dev lo parent 1: protocol arp basic classid 1:100 + [...] + # tc q a dev lo parent 1:100 handle 100: pfifo_fast + [...] + # tc q d dev lo root + +can generate the following splat: + + Unable to handle kernel paging request at virtual address dfff2c01bd148000 + Mem abort info: + ESR = 0x96000004 + Exception class = DABT (current EL), IL = 32 bits + SET = 0, FnV = 0 + EA = 0, S1PTW = 0 + Data abort info: + ISV = 0, ISS = 0x00000004 + CM = 0, WnR = 0 + [dfff2c01bd148000] address between user and kernel address ranges + Internal error: Oops: 96000004 [#1] SMP + [...] + pstate: 80000005 (Nzcv daif -PAN -UAO) + pc : pfifo_fast_reset+0x280/0x4d8 + lr : pfifo_fast_reset+0x21c/0x4d8 + sp : ffff800d09676fa0 + x29: ffff800d09676fa0 x28: ffff200012ee22e4 + x27: dfff200000000000 x26: 0000000000000000 + x25: ffff800ca0799958 x24: ffff1001940f332b + x23: 0000000000000007 x22: ffff200012ee1ab8 + x21: 0000600de8a40000 x20: 0000000000000000 + x19: ffff800ca0799900 x18: 0000000000000000 + x17: 0000000000000002 x16: 0000000000000000 + x15: 0000000000000000 x14: 0000000000000000 + x13: 0000000000000000 x12: ffff1001b922e6e2 + x11: 1ffff001b922e6e1 x10: 0000000000000000 + x9 : 1ffff001b922e6e1 x8 : dfff200000000000 + x7 : 0000000000000000 x6 : 0000000000000000 + x5 : 1fffe400025dc45c x4 : 1fffe400025dc357 + x3 : 00000c01bd148000 x2 : 0000600de8a40000 + x1 : 0000000000000007 x0 : 0000600de8a40004 + Call trace: + pfifo_fast_reset+0x280/0x4d8 + qdisc_reset+0x6c/0x370 + htb_reset+0x150/0x3b8 [sch_htb] + qdisc_reset+0x6c/0x370 + dev_deactivate_queue.constprop.5+0xe0/0x1a8 + dev_deactivate_many+0xd8/0x908 + dev_deactivate+0xe4/0x190 + qdisc_graft+0x88c/0xbd0 + tc_get_qdisc+0x418/0x8a8 + rtnetlink_rcv_msg+0x3a8/0xa78 + netlink_rcv_skb+0x18c/0x328 + rtnetlink_rcv+0x28/0x38 + netlink_unicast+0x3c4/0x538 + netlink_sendmsg+0x538/0x9a0 + sock_sendmsg+0xac/0xf8 + ___sys_sendmsg+0x53c/0x658 + __sys_sendmsg+0xc8/0x140 + __arm64_sys_sendmsg+0x74/0xa8 + el0_svc_handler+0x164/0x468 + el0_svc+0x10/0x14 + Code: 910012a0 92400801 d343fc03 11000c21 (38fb6863) + +Fix this by testing the value of 'TCQ_F_CPUSTATS' bit in 'qdisc->flags', +before dereferencing 'qdisc->cpu_qstats'. + +Changes since v1: + - coding style improvements, thanks to Stefano Brivio + +Fixes: 8a53e616de29 ("net: sched: when clearing NOLOCK, clear TCQ_F_CPUSTATS, too") +CC: Paolo Abeni +Reported-by: Li Shuang +Signed-off-by: Davide Caratti +Acked-by: Paolo Abeni +Reviewed-by: Stefano Brivio +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_generic.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -692,11 +692,14 @@ static void pfifo_fast_reset(struct Qdis + kfree_skb(skb); + } + +- for_each_possible_cpu(i) { +- struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i); ++ if (qdisc_is_percpu_stats(qdisc)) { ++ for_each_possible_cpu(i) { ++ struct gnet_stats_queue *q; + +- q->backlog = 0; +- q->qlen = 0; ++ q = per_cpu_ptr(qdisc->cpu_qstats, i); ++ q->backlog = 0; ++ q->qlen = 0; ++ } + } + } + diff --git a/queue-5.2/net-stmmac-dwmac-rk-don-t-fail-if-phy-regulator-is-absent.patch b/queue-5.2/net-stmmac-dwmac-rk-don-t-fail-if-phy-regulator-is-absent.patch new file mode 100644 index 00000000000..e4ba907804b --- /dev/null +++ b/queue-5.2/net-stmmac-dwmac-rk-don-t-fail-if-phy-regulator-is-absent.patch @@ -0,0 +1,40 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: Chen-Yu Tsai +Date: Thu, 29 Aug 2019 11:17:24 +0800 +Subject: net: stmmac: dwmac-rk: Don't fail if phy regulator is absent + +From: Chen-Yu Tsai + +[ Upstream commit 3b25528e1e355c803e73aa326ce657b5606cda73 ] + +The devicetree binding lists the phy phy as optional. As such, the +driver should not bail out if it can't find a regulator. Instead it +should just skip the remaining regulator related code and continue +on normally. + +Skip the remainder of phy_power_on() if a regulator supply isn't +available. This also gets rid of the bogus return code. + +Fixes: 2e12f536635f ("net: stmmac: dwmac-rk: Use standard devicetree property for phy regulator") +Signed-off-by: Chen-Yu Tsai +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +@@ -1194,10 +1194,8 @@ static int phy_power_on(struct rk_priv_d + int ret; + struct device *dev = &bsp_priv->pdev->dev; + +- if (!ldo) { +- dev_err(dev, "no regulator found\n"); +- return -1; +- } ++ if (!ldo) ++ return 0; + + if (enable) { + ret = regulator_enable(ldo); diff --git a/queue-5.2/net_sched-fix-a-null-pointer-deref-in-ipt-action.patch b/queue-5.2/net_sched-fix-a-null-pointer-deref-in-ipt-action.patch new file mode 100644 index 00000000000..48d6c082d83 --- /dev/null +++ b/queue-5.2/net_sched-fix-a-null-pointer-deref-in-ipt-action.patch @@ -0,0 +1,289 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: Cong Wang +Date: Sun, 25 Aug 2019 10:01:32 -0700 +Subject: net_sched: fix a NULL pointer deref in ipt action + +From: Cong Wang + +[ Upstream commit 981471bd3abf4d572097645d765391533aac327d ] + +The net pointer in struct xt_tgdtor_param is not explicitly +initialized therefore is still NULL when dereferencing it. +So we have to find a way to pass the correct net pointer to +ipt_destroy_target(). + +The best way I find is just saving the net pointer inside the per +netns struct tcf_idrinfo, which could make this patch smaller. + +Fixes: 0c66dc1ea3f0 ("netfilter: conntrack: register hooks in netns when needed by ruleset") +Reported-and-tested-by: itugrok@yahoo.com +Cc: Jamal Hadi Salim +Cc: Jiri Pirko +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/act_api.h | 4 +++- + net/sched/act_bpf.c | 2 +- + net/sched/act_connmark.c | 2 +- + net/sched/act_csum.c | 2 +- + net/sched/act_gact.c | 2 +- + net/sched/act_ife.c | 2 +- + net/sched/act_ipt.c | 11 ++++++----- + net/sched/act_mirred.c | 2 +- + net/sched/act_nat.c | 2 +- + net/sched/act_pedit.c | 2 +- + net/sched/act_police.c | 2 +- + net/sched/act_sample.c | 2 +- + net/sched/act_simple.c | 2 +- + net/sched/act_skbedit.c | 2 +- + net/sched/act_skbmod.c | 2 +- + net/sched/act_tunnel_key.c | 2 +- + net/sched/act_vlan.c | 2 +- + 17 files changed, 24 insertions(+), 21 deletions(-) + +--- a/include/net/act_api.h ++++ b/include/net/act_api.h +@@ -15,6 +15,7 @@ + struct tcf_idrinfo { + struct mutex lock; + struct idr action_idr; ++ struct net *net; + }; + + struct tc_action_ops; +@@ -108,7 +109,7 @@ struct tc_action_net { + }; + + static inline +-int tc_action_net_init(struct tc_action_net *tn, ++int tc_action_net_init(struct net *net, struct tc_action_net *tn, + const struct tc_action_ops *ops) + { + int err = 0; +@@ -117,6 +118,7 @@ int tc_action_net_init(struct tc_action_ + if (!tn->idrinfo) + return -ENOMEM; + tn->ops = ops; ++ tn->idrinfo->net = net; + mutex_init(&tn->idrinfo->lock); + idr_init(&tn->idrinfo->action_idr); + return err; +--- a/net/sched/act_bpf.c ++++ b/net/sched/act_bpf.c +@@ -422,7 +422,7 @@ static __net_init int bpf_init_net(struc + { + struct tc_action_net *tn = net_generic(net, bpf_net_id); + +- return tc_action_net_init(tn, &act_bpf_ops); ++ return tc_action_net_init(net, tn, &act_bpf_ops); + } + + static void __net_exit bpf_exit_net(struct list_head *net_list) +--- a/net/sched/act_connmark.c ++++ b/net/sched/act_connmark.c +@@ -231,7 +231,7 @@ static __net_init int connmark_init_net( + { + struct tc_action_net *tn = net_generic(net, connmark_net_id); + +- return tc_action_net_init(tn, &act_connmark_ops); ++ return tc_action_net_init(net, tn, &act_connmark_ops); + } + + static void __net_exit connmark_exit_net(struct list_head *net_list) +--- a/net/sched/act_csum.c ++++ b/net/sched/act_csum.c +@@ -714,7 +714,7 @@ static __net_init int csum_init_net(stru + { + struct tc_action_net *tn = net_generic(net, csum_net_id); + +- return tc_action_net_init(tn, &act_csum_ops); ++ return tc_action_net_init(net, tn, &act_csum_ops); + } + + static void __net_exit csum_exit_net(struct list_head *net_list) +--- a/net/sched/act_gact.c ++++ b/net/sched/act_gact.c +@@ -278,7 +278,7 @@ static __net_init int gact_init_net(stru + { + struct tc_action_net *tn = net_generic(net, gact_net_id); + +- return tc_action_net_init(tn, &act_gact_ops); ++ return tc_action_net_init(net, tn, &act_gact_ops); + } + + static void __net_exit gact_exit_net(struct list_head *net_list) +--- a/net/sched/act_ife.c ++++ b/net/sched/act_ife.c +@@ -890,7 +890,7 @@ static __net_init int ife_init_net(struc + { + struct tc_action_net *tn = net_generic(net, ife_net_id); + +- return tc_action_net_init(tn, &act_ife_ops); ++ return tc_action_net_init(net, tn, &act_ife_ops); + } + + static void __net_exit ife_exit_net(struct list_head *net_list) +--- a/net/sched/act_ipt.c ++++ b/net/sched/act_ipt.c +@@ -61,12 +61,13 @@ static int ipt_init_target(struct net *n + return 0; + } + +-static void ipt_destroy_target(struct xt_entry_target *t) ++static void ipt_destroy_target(struct xt_entry_target *t, struct net *net) + { + struct xt_tgdtor_param par = { + .target = t->u.kernel.target, + .targinfo = t->data, + .family = NFPROTO_IPV4, ++ .net = net, + }; + if (par.target->destroy != NULL) + par.target->destroy(&par); +@@ -78,7 +79,7 @@ static void tcf_ipt_release(struct tc_ac + struct tcf_ipt *ipt = to_ipt(a); + + if (ipt->tcfi_t) { +- ipt_destroy_target(ipt->tcfi_t); ++ ipt_destroy_target(ipt->tcfi_t, a->idrinfo->net); + kfree(ipt->tcfi_t); + } + kfree(ipt->tcfi_tname); +@@ -180,7 +181,7 @@ static int __tcf_ipt_init(struct net *ne + + spin_lock_bh(&ipt->tcf_lock); + if (ret != ACT_P_CREATED) { +- ipt_destroy_target(ipt->tcfi_t); ++ ipt_destroy_target(ipt->tcfi_t, net); + kfree(ipt->tcfi_tname); + kfree(ipt->tcfi_t); + } +@@ -350,7 +351,7 @@ static __net_init int ipt_init_net(struc + { + struct tc_action_net *tn = net_generic(net, ipt_net_id); + +- return tc_action_net_init(tn, &act_ipt_ops); ++ return tc_action_net_init(net, tn, &act_ipt_ops); + } + + static void __net_exit ipt_exit_net(struct list_head *net_list) +@@ -399,7 +400,7 @@ static __net_init int xt_init_net(struct + { + struct tc_action_net *tn = net_generic(net, xt_net_id); + +- return tc_action_net_init(tn, &act_xt_ops); ++ return tc_action_net_init(net, tn, &act_xt_ops); + } + + static void __net_exit xt_exit_net(struct list_head *net_list) +--- a/net/sched/act_mirred.c ++++ b/net/sched/act_mirred.c +@@ -432,7 +432,7 @@ static __net_init int mirred_init_net(st + { + struct tc_action_net *tn = net_generic(net, mirred_net_id); + +- return tc_action_net_init(tn, &act_mirred_ops); ++ return tc_action_net_init(net, tn, &act_mirred_ops); + } + + static void __net_exit mirred_exit_net(struct list_head *net_list) +--- a/net/sched/act_nat.c ++++ b/net/sched/act_nat.c +@@ -327,7 +327,7 @@ static __net_init int nat_init_net(struc + { + struct tc_action_net *tn = net_generic(net, nat_net_id); + +- return tc_action_net_init(tn, &act_nat_ops); ++ return tc_action_net_init(net, tn, &act_nat_ops); + } + + static void __net_exit nat_exit_net(struct list_head *net_list) +--- a/net/sched/act_pedit.c ++++ b/net/sched/act_pedit.c +@@ -498,7 +498,7 @@ static __net_init int pedit_init_net(str + { + struct tc_action_net *tn = net_generic(net, pedit_net_id); + +- return tc_action_net_init(tn, &act_pedit_ops); ++ return tc_action_net_init(net, tn, &act_pedit_ops); + } + + static void __net_exit pedit_exit_net(struct list_head *net_list) +--- a/net/sched/act_police.c ++++ b/net/sched/act_police.c +@@ -371,7 +371,7 @@ static __net_init int police_init_net(st + { + struct tc_action_net *tn = net_generic(net, police_net_id); + +- return tc_action_net_init(tn, &act_police_ops); ++ return tc_action_net_init(net, tn, &act_police_ops); + } + + static void __net_exit police_exit_net(struct list_head *net_list) +--- a/net/sched/act_sample.c ++++ b/net/sched/act_sample.c +@@ -269,7 +269,7 @@ static __net_init int sample_init_net(st + { + struct tc_action_net *tn = net_generic(net, sample_net_id); + +- return tc_action_net_init(tn, &act_sample_ops); ++ return tc_action_net_init(net, tn, &act_sample_ops); + } + + static void __net_exit sample_exit_net(struct list_head *net_list) +--- a/net/sched/act_simple.c ++++ b/net/sched/act_simple.c +@@ -232,7 +232,7 @@ static __net_init int simp_init_net(stru + { + struct tc_action_net *tn = net_generic(net, simp_net_id); + +- return tc_action_net_init(tn, &act_simp_ops); ++ return tc_action_net_init(net, tn, &act_simp_ops); + } + + static void __net_exit simp_exit_net(struct list_head *net_list) +--- a/net/sched/act_skbedit.c ++++ b/net/sched/act_skbedit.c +@@ -336,7 +336,7 @@ static __net_init int skbedit_init_net(s + { + struct tc_action_net *tn = net_generic(net, skbedit_net_id); + +- return tc_action_net_init(tn, &act_skbedit_ops); ++ return tc_action_net_init(net, tn, &act_skbedit_ops); + } + + static void __net_exit skbedit_exit_net(struct list_head *net_list) +--- a/net/sched/act_skbmod.c ++++ b/net/sched/act_skbmod.c +@@ -287,7 +287,7 @@ static __net_init int skbmod_init_net(st + { + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + +- return tc_action_net_init(tn, &act_skbmod_ops); ++ return tc_action_net_init(net, tn, &act_skbmod_ops); + } + + static void __net_exit skbmod_exit_net(struct list_head *net_list) +--- a/net/sched/act_tunnel_key.c ++++ b/net/sched/act_tunnel_key.c +@@ -600,7 +600,7 @@ static __net_init int tunnel_key_init_ne + { + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + +- return tc_action_net_init(tn, &act_tunnel_key_ops); ++ return tc_action_net_init(net, tn, &act_tunnel_key_ops); + } + + static void __net_exit tunnel_key_exit_net(struct list_head *net_list) +--- a/net/sched/act_vlan.c ++++ b/net/sched/act_vlan.c +@@ -334,7 +334,7 @@ static __net_init int vlan_init_net(stru + { + struct tc_action_net *tn = net_generic(net, vlan_net_id); + +- return tc_action_net_init(tn, &act_vlan_ops); ++ return tc_action_net_init(net, tn, &act_vlan_ops); + } + + static void __net_exit vlan_exit_net(struct list_head *net_list) diff --git a/queue-5.2/nfp-flower-handle-neighbour-events-on-internal-ports.patch b/queue-5.2/nfp-flower-handle-neighbour-events-on-internal-ports.patch new file mode 100644 index 00000000000..7cf5ba62eeb --- /dev/null +++ b/queue-5.2/nfp-flower-handle-neighbour-events-on-internal-ports.patch @@ -0,0 +1,51 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: John Hurley +Date: Tue, 27 Aug 2019 22:56:30 -0700 +Subject: nfp: flower: handle neighbour events on internal ports + +From: John Hurley + +[ Upstream commit e8024cb483abb2b0290b3ef5e34c736e9de2492f ] + +Recent code changes to NFP allowed the offload of neighbour entries to FW +when the next hop device was an internal port. This allows for offload of +tunnel encap when the end-point IP address is applied to such a port. + +Unfortunately, the neighbour event handler still rejects events that are +not associated with a repr dev and so the firmware neighbour table may get +out of sync for internal ports. + +Fix this by allowing internal port neighbour events to be correctly +processed. + +Fixes: 45756dfedab5 ("nfp: flower: allow tunnels to output to internal port") +Signed-off-by: John Hurley +Reviewed-by: Simon Horman +Reviewed-by: Jakub Kicinski +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c ++++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +@@ -329,13 +329,13 @@ nfp_tun_neigh_event_handler(struct notif + + flow.daddr = *(__be32 *)n->primary_key; + +- /* Only concerned with route changes for representors. */ +- if (!nfp_netdev_is_nfp_repr(n->dev)) +- return NOTIFY_DONE; +- + app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); + app = app_priv->app; + ++ if (!nfp_netdev_is_nfp_repr(n->dev) && ++ !nfp_flower_internal_port_can_offload(app, n->dev)) ++ return NOTIFY_DONE; ++ + /* Only concerned with changes to routes already added to NFP. */ + if (!nfp_tun_has_route(app, flow.daddr)) + return NOTIFY_DONE; diff --git a/queue-5.2/nfp-flower-prevent-ingress-block-binds-on-internal-ports.patch b/queue-5.2/nfp-flower-prevent-ingress-block-binds-on-internal-ports.patch new file mode 100644 index 00000000000..0f7594b0ee8 --- /dev/null +++ b/queue-5.2/nfp-flower-prevent-ingress-block-binds-on-internal-ports.patch @@ -0,0 +1,46 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: John Hurley +Date: Tue, 27 Aug 2019 22:56:29 -0700 +Subject: nfp: flower: prevent ingress block binds on internal ports + +From: John Hurley + +[ Upstream commit 739d7c5752b255e89ddbb1b0474f3b88ef5cd343 ] + +Internal port TC offload is implemented through user-space applications +(such as OvS) by adding filters at egress via TC clsact qdiscs. Indirect +block offload support in the NFP driver accepts both ingress qdisc binds +and egress binds if the device is an internal port. However, clsact sends +bind notification for both ingress and egress block binds which can lead +to the driver registering multiple callbacks and receiving multiple +notifications of new filters. + +Fix this by rejecting ingress block bind callbacks when the port is +internal and only adding filter callbacks for egress binds. + +Fixes: 4d12ba42787b ("nfp: flower: allow offloading of matches on 'internal' ports") +Signed-off-by: John Hurley +Reviewed-by: Jakub Kicinski +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/flower/offload.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c ++++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c +@@ -1280,9 +1280,10 @@ nfp_flower_setup_indr_tc_block(struct ne + struct nfp_flower_priv *priv = app->priv; + int err; + +- if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS && +- !(f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS && +- nfp_flower_internal_port_can_offload(app, netdev))) ++ if ((f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS && ++ !nfp_flower_internal_port_can_offload(app, netdev)) || ++ (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS && ++ nfp_flower_internal_port_can_offload(app, netdev))) + return -EOPNOTSUPP; + + switch (f->command) { diff --git a/queue-5.2/r8152-remove-calling-netif_napi_del.patch b/queue-5.2/r8152-remove-calling-netif_napi_del.patch new file mode 100644 index 00000000000..035c09d37b9 --- /dev/null +++ b/queue-5.2/r8152-remove-calling-netif_napi_del.patch @@ -0,0 +1,37 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: Hayes Wang +Date: Wed, 28 Aug 2019 09:51:42 +0800 +Subject: r8152: remove calling netif_napi_del + +From: Hayes Wang + +[ Upstream commit 973dc6cfc0e2c43ff29ca5645ceaf1ae694ea110 ] + +Remove unnecessary use of netif_napi_del. This also avoids to call +napi_disable() after netif_napi_del(). + +Signed-off-by: Hayes Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/r8152.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -5309,7 +5309,6 @@ static int rtl8152_probe(struct usb_inte + return 0; + + out1: +- netif_napi_del(&tp->napi); + usb_set_intfdata(intf, NULL); + out: + free_netdev(netdev); +@@ -5327,7 +5326,6 @@ static void rtl8152_disconnect(struct us + if (udev->state == USB_STATE_NOTATTACHED) + set_bit(RTL8152_UNPLUG, &tp->flags); + +- netif_napi_del(&tp->napi); + unregister_netdev(tp->netdev); + cancel_delayed_work_sync(&tp->hw_phy_work); + tp->rtl_ops.unload(tp); diff --git a/queue-5.2/revert-r8152-napi-hangup-fix-after-disconnect.patch b/queue-5.2/revert-r8152-napi-hangup-fix-after-disconnect.patch new file mode 100644 index 00000000000..afe9c19096c --- /dev/null +++ b/queue-5.2/revert-r8152-napi-hangup-fix-after-disconnect.patch @@ -0,0 +1,43 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: Hayes Wang +Date: Wed, 28 Aug 2019 09:51:41 +0800 +Subject: Revert "r8152: napi hangup fix after disconnect" + +From: Hayes Wang + +[ Upstream commit 49d4b14113cae1410eb4654ada5b9583bad971c4 ] + +This reverts commit 0ee1f4734967af8321ecebaf9c74221ace34f2d5. + +The commit 0ee1f4734967 ("r8152: napi hangup fix after +disconnect") adds a check about RTL8152_UNPLUG to determine +if calling napi_disable() is invalid in rtl8152_close(), +when rtl8152_disconnect() is called. This avoids to use +napi_disable() after calling netif_napi_del(). + +Howver, commit ffa9fec30ca0 ("r8152: set RTL8152_UNPLUG +only for real disconnection") causes that RTL8152_UNPLUG +is not always set when calling rtl8152_disconnect(). +Therefore, I have to revert commit 0ee1f4734967 ("r8152: +napi hangup fix after disconnect"), first. And submit +another patch to fix it. + +Signed-off-by: Hayes Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/r8152.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -3987,8 +3987,7 @@ static int rtl8152_close(struct net_devi + #ifdef CONFIG_PM_SLEEP + unregister_pm_notifier(&tp->pm_notifier); + #endif +- if (!test_bit(RTL8152_UNPLUG, &tp->flags)) +- napi_disable(&tp->napi); ++ napi_disable(&tp->napi); + clear_bit(WORK_ENABLE, &tp->flags); + usb_kill_urb(tp->intr_urb); + cancel_delayed_work_sync(&tp->schedule); diff --git a/queue-5.2/series b/queue-5.2/series index ecebd932e5d..c7ae25d8572 100644 --- a/queue-5.2/series +++ b/queue-5.2/series @@ -1,3 +1,22 @@ +mld-fix-memory-leak-in-mld_del_delrec.patch +net-fix-skb-use-after-free-in-netpoll.patch +net-sched-act_sample-fix-psample-group-handling-on-overwrite.patch +net_sched-fix-a-null-pointer-deref-in-ipt-action.patch +net-stmmac-dwmac-rk-don-t-fail-if-phy-regulator-is-absent.patch +tcp-inherit-timestamp-on-mtu-probe.patch +tcp-remove-empty-skb-from-write-queue-in-error-cases.patch +nfp-flower-prevent-ingress-block-binds-on-internal-ports.patch +nfp-flower-handle-neighbour-events-on-internal-ports.patch +revert-r8152-napi-hangup-fix-after-disconnect.patch +r8152-remove-calling-netif_napi_del.patch +taprio-fix-kernel-panic-in-taprio_destroy.patch +taprio-set-default-link-speed-to-10-mbps-in-taprio_set_picos_per_byte.patch +net-sched-cbs-set-default-link-speed-to-10-mbps-in-cbs_set_port_rate.patch +add-genphy_c45_config_aneg-function-to-phy-c45.c.patch +net-dsa-tag_8021q-future-proof-the-reserved-fields-in-the-custom-vid.patch +net-sched-pfifo_fast-fix-wrong-dereference-in-pfifo_fast_enqueue.patch +net-sched-pfifo_fast-fix-wrong-dereference-when-qdisc-is-reset.patch +net-rds-fix-info-leak-in-rds6_inc_info_copy.patch batman-adv-fix-netlink-dumping-of-all-mcast_flags-bu.patch libbpf-fix-erroneous-multi-closing-of-btf-fd.patch libbpf-set-btf-fd-for-prog-only-when-there-is-suppor.patch diff --git a/queue-5.2/taprio-fix-kernel-panic-in-taprio_destroy.patch b/queue-5.2/taprio-fix-kernel-panic-in-taprio_destroy.patch new file mode 100644 index 00000000000..9b1c37edaa2 --- /dev/null +++ b/queue-5.2/taprio-fix-kernel-panic-in-taprio_destroy.patch @@ -0,0 +1,54 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: Vladimir Oltean +Date: Fri, 30 Aug 2019 04:07:21 +0300 +Subject: taprio: Fix kernel panic in taprio_destroy + +From: Vladimir Oltean + +taprio_init may fail earlier than this line: + + list_add(&q->taprio_list, &taprio_list); + +i.e. due to the net device not being multi queue. + +Attempting to remove q from the global taprio_list when it is not part +of it will result in a kernel panic. + +Fix it by matching list_add and list_del better to one another in the +order of operations. This way we can keep the deletion unconditional +and with lower complexity - O(1). + +Cc: Leandro Dorileo +Fixes: 7b9eba7ba0c1 ("net/sched: taprio: fix picos_per_byte miscalculation") +Signed-off-by: Vladimir Oltean +Acked-by: Vinicius Costa Gomes +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_taprio.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_taprio.c ++++ b/net/sched/sch_taprio.c +@@ -903,6 +903,10 @@ static int taprio_init(struct Qdisc *sch + */ + q->clockid = -1; + ++ spin_lock(&taprio_list_lock); ++ list_add(&q->taprio_list, &taprio_list); ++ spin_unlock(&taprio_list_lock); ++ + if (sch->parent != TC_H_ROOT) + return -EOPNOTSUPP; + +@@ -920,10 +924,6 @@ static int taprio_init(struct Qdisc *sch + if (!opt) + return -EINVAL; + +- spin_lock(&taprio_list_lock); +- list_add(&q->taprio_list, &taprio_list); +- spin_unlock(&taprio_list_lock); +- + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *dev_queue; + struct Qdisc *qdisc; diff --git a/queue-5.2/taprio-set-default-link-speed-to-10-mbps-in-taprio_set_picos_per_byte.patch b/queue-5.2/taprio-set-default-link-speed-to-10-mbps-in-taprio_set_picos_per_byte.patch new file mode 100644 index 00000000000..5d1b02accae --- /dev/null +++ b/queue-5.2/taprio-set-default-link-speed-to-10-mbps-in-taprio_set_picos_per_byte.patch @@ -0,0 +1,118 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: Vladimir Oltean +Date: Fri, 30 Aug 2019 04:07:22 +0300 +Subject: taprio: Set default link speed to 10 Mbps in taprio_set_picos_per_byte + +From: Vladimir Oltean + +The taprio budget needs to be adapted at runtime according to interface +link speed. But that handling is problematic. + +For one thing, installing a qdisc on an interface that doesn't have +carrier is not illegal. But taprio prints the following stack trace: + +[ 31.851373] ------------[ cut here ]------------ +[ 31.856024] WARNING: CPU: 1 PID: 207 at net/sched/sch_taprio.c:481 taprio_dequeue+0x1a8/0x2d4 +[ 31.864566] taprio: dequeue() called with unknown picos per byte. +[ 31.864570] Modules linked in: +[ 31.873701] CPU: 1 PID: 207 Comm: tc Not tainted 5.3.0-rc5-01199-g8838fe023cd6 #1689 +[ 31.881398] Hardware name: Freescale LS1021A +[ 31.885661] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) +[ 31.893368] [] (show_stack) from [] (dump_stack+0xb4/0xc8) +[ 31.900555] [] (dump_stack) from [] (__warn+0xe0/0xf8) +[ 31.907395] [] (__warn) from [] (warn_slowpath_fmt+0x48/0x6c) +[ 31.914841] [] (warn_slowpath_fmt) from [] (taprio_dequeue+0x1a8/0x2d4) +[ 31.923150] [] (taprio_dequeue) from [] (__qdisc_run+0x90/0x61c) +[ 31.930856] [] (__qdisc_run) from [] (net_tx_action+0x12c/0x2bc) +[ 31.938560] [] (net_tx_action) from [] (__do_softirq+0x130/0x3c8) +[ 31.946350] [] (__do_softirq) from [] (irq_exit+0xbc/0xd8) +[ 31.953536] [] (irq_exit) from [] (__handle_domain_irq+0x60/0xb4) +[ 31.961328] [] (__handle_domain_irq) from [] (gic_handle_irq+0x58/0x9c) +[ 31.969638] [] (gic_handle_irq) from [] (__irq_svc+0x6c/0x90) +[ 31.977076] Exception stack(0xe8167b20 to 0xe8167b68) +[ 31.982100] 7b20: e9d4bd80 00000cc0 000000cf 00000000 e9d4bd80 c1f38958 00000cc0 c1f38960 +[ 31.990234] 7b40: 00000001 000000cf 00000004 e9dc0800 00000000 e8167b70 c0f478ec c0f46d94 +[ 31.998363] 7b60: 60070013 ffffffff +[ 32.001833] [] (__irq_svc) from [] (netlink_trim+0x18/0xd8) +[ 32.009104] [] (netlink_trim) from [] (netlink_broadcast_filtered+0x34/0x414) +[ 32.017930] [] (netlink_broadcast_filtered) from [] (netlink_broadcast+0x20/0x28) +[ 32.027102] [] (netlink_broadcast) from [] (rtnetlink_send+0x34/0x88) +[ 32.035238] [] (rtnetlink_send) from [] (notify_and_destroy+0x2c/0x44) +[ 32.043461] [] (notify_and_destroy) from [] (qdisc_graft+0x398/0x470) +[ 32.051595] [] (qdisc_graft) from [] (tc_modify_qdisc+0x3a4/0x724) +[ 32.059470] [] (tc_modify_qdisc) from [] (rtnetlink_rcv_msg+0x260/0x2ec) +[ 32.067864] [] (rtnetlink_rcv_msg) from [] (netlink_rcv_skb+0xb8/0x110) +[ 32.076172] [] (netlink_rcv_skb) from [] (netlink_unicast+0x1b4/0x22c) +[ 32.084392] [] (netlink_unicast) from [] (netlink_sendmsg+0x33c/0x380) +[ 32.092614] [] (netlink_sendmsg) from [] (sock_sendmsg+0x14/0x24) +[ 32.100403] [] (sock_sendmsg) from [] (___sys_sendmsg+0x214/0x228) +[ 32.108279] [] (___sys_sendmsg) from [] (__sys_sendmsg+0x50/0x8c) +[ 32.116068] [] (__sys_sendmsg) from [] (ret_fast_syscall+0x0/0x54) +[ 32.123938] Exception stack(0xe8167fa8 to 0xe8167ff0) +[ 32.128960] 7fa0: b6fa68c8 000000f8 00000003 bea142d0 00000000 00000000 +[ 32.137093] 7fc0: b6fa68c8 000000f8 0052154c 00000128 5d6468a2 00000000 00000028 00558c9c +[ 32.145224] 7fe0: 00000070 bea14278 00530d64 b6e17e64 +[ 32.150659] ---[ end trace 2139c9827c3e5177 ]--- + +This happens because the qdisc ->dequeue callback gets called. Which +again is not illegal, the qdisc will dequeue even when the interface is +up but doesn't have carrier (and hence SPEED_UNKNOWN), and the frames +will be dropped further down the stack in dev_direct_xmit(). + +And, at the end of the day, for what? For calculating the initial budget +of an interface which is non-operational at the moment and where frames +will get dropped anyway. + +So if we can't figure out the link speed, default to SPEED_10 and move +along. We can also remove the runtime check now. + +Cc: Leandro Dorileo +Fixes: 7b9eba7ba0c1 ("net/sched: taprio: fix picos_per_byte miscalculation") +Acked-by: Vinicius Costa Gomes +Signed-off-by: Vladimir Oltean +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_taprio.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +--- a/net/sched/sch_taprio.c ++++ b/net/sched/sch_taprio.c +@@ -205,11 +205,6 @@ static struct sk_buff *taprio_dequeue(st + u32 gate_mask; + int i; + +- if (atomic64_read(&q->picos_per_byte) == -1) { +- WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte."); +- return NULL; +- } +- + rcu_read_lock(); + entry = rcu_dereference(q->current_entry); + /* if there's no entry, it means that the schedule didn't +@@ -665,12 +660,20 @@ static void taprio_set_picos_per_byte(st + struct taprio_sched *q) + { + struct ethtool_link_ksettings ecmd; +- int picos_per_byte = -1; ++ int speed = SPEED_10; ++ int picos_per_byte; ++ int err; ++ ++ err = __ethtool_get_link_ksettings(dev, &ecmd); ++ if (err < 0) ++ goto skip; ++ ++ if (ecmd.base.speed != SPEED_UNKNOWN) ++ speed = ecmd.base.speed; + +- if (!__ethtool_get_link_ksettings(dev, &ecmd) && +- ecmd.base.speed != SPEED_UNKNOWN) +- picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, +- ecmd.base.speed * 1000 * 1000); ++skip: ++ picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, ++ speed * 1000 * 1000); + + atomic64_set(&q->picos_per_byte, picos_per_byte); + netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", diff --git a/queue-5.2/tcp-inherit-timestamp-on-mtu-probe.patch b/queue-5.2/tcp-inherit-timestamp-on-mtu-probe.patch new file mode 100644 index 00000000000..0ccb92352a7 --- /dev/null +++ b/queue-5.2/tcp-inherit-timestamp-on-mtu-probe.patch @@ -0,0 +1,47 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: Willem de Bruijn +Date: Tue, 27 Aug 2019 15:09:33 -0400 +Subject: tcp: inherit timestamp on mtu probe + +From: Willem de Bruijn + +[ Upstream commit 888a5c53c0d8be6e98bc85b677f179f77a647873 ] + +TCP associates tx timestamp requests with a byte in the bytestream. +If merging skbs in tcp_mtu_probe, migrate the tstamp request. + +Similar to MSG_EOR, do not allow moving a timestamp from any segment +in the probe but the last. This to avoid merging multiple timestamps. + +Tested with the packetdrill script at +https://github.com/wdebruij/packetdrill/commits/mtu_probe-1 + +Link: http://patchwork.ozlabs.org/patch/1143278/#2232897 +Fixes: 4ed2d765dfac ("net-timestamp: TCP timestamping") +Signed-off-by: Willem de Bruijn +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2051,7 +2051,7 @@ static bool tcp_can_coalesce_send_queue_ + if (len <= skb->len) + break; + +- if (unlikely(TCP_SKB_CB(skb)->eor)) ++ if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb)) + return false; + + len -= skb->len; +@@ -2168,6 +2168,7 @@ static int tcp_mtu_probe(struct sock *sk + * we need to propagate it to the new skb. + */ + TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor; ++ tcp_skb_collapse_tstamp(nskb, skb); + tcp_unlink_write_queue(skb, sk); + sk_wmem_free_skb(sk, skb); + } else { diff --git a/queue-5.2/tcp-remove-empty-skb-from-write-queue-in-error-cases.patch b/queue-5.2/tcp-remove-empty-skb-from-write-queue-in-error-cases.patch new file mode 100644 index 00000000000..9ab1b174f32 --- /dev/null +++ b/queue-5.2/tcp-remove-empty-skb-from-write-queue-in-error-cases.patch @@ -0,0 +1,90 @@ +From foo@baz Sun 08 Sep 2019 09:06:33 AM WEST +From: Eric Dumazet +Date: Mon, 26 Aug 2019 09:19:15 -0700 +Subject: tcp: remove empty skb from write queue in error cases + +From: Eric Dumazet + +[ Upstream commit fdfc5c8594c24c5df883583ebd286321a80e0a67 ] + +Vladimir Rutsky reported stuck TCP sessions after memory pressure +events. Edge Trigger epoll() user would never receive an EPOLLOUT +notification allowing them to retry a sendmsg(). + +Jason tested the case of sk_stream_alloc_skb() returning NULL, +but there are other paths that could lead both sendmsg() and sendpage() +to return -1 (EAGAIN), with an empty skb queued on the write queue. + +This patch makes sure we remove this empty skb so that +Jason code can detect that the queue is empty, and +call sk->sk_write_space(sk) accordingly. + +Fixes: ce5ec440994b ("tcp: ensure epoll edge trigger wakeup when write queue is empty") +Signed-off-by: Eric Dumazet +Cc: Jason Baron +Reported-by: Vladimir Rutsky +Cc: Soheil Hassas Yeganeh +Cc: Neal Cardwell +Acked-by: Soheil Hassas Yeganeh +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 30 ++++++++++++++++++++---------- + 1 file changed, 20 insertions(+), 10 deletions(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -935,6 +935,22 @@ static int tcp_send_mss(struct sock *sk, + return mss_now; + } + ++/* In some cases, both sendpage() and sendmsg() could have added ++ * an skb to the write queue, but failed adding payload on it. ++ * We need to remove it to consume less memory, but more ++ * importantly be able to generate EPOLLOUT for Edge Trigger epoll() ++ * users. ++ */ ++static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) ++{ ++ if (skb && !skb->len) { ++ tcp_unlink_write_queue(skb, sk); ++ if (tcp_write_queue_empty(sk)) ++ tcp_chrono_stop(sk, TCP_CHRONO_BUSY); ++ sk_wmem_free_skb(sk, skb); ++ } ++} ++ + ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, + size_t size, int flags) + { +@@ -1064,6 +1080,7 @@ out: + return copied; + + do_error: ++ tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk)); + if (copied) + goto out; + out_err: +@@ -1388,18 +1405,11 @@ out_nopush: + sock_zerocopy_put(uarg); + return copied + copied_syn; + ++do_error: ++ skb = tcp_write_queue_tail(sk); + do_fault: +- if (!skb->len) { +- tcp_unlink_write_queue(skb, sk); +- /* It is the one place in all of TCP, except connection +- * reset, where we can be unlinking the send_head. +- */ +- if (tcp_write_queue_empty(sk)) +- tcp_chrono_stop(sk, TCP_CHRONO_BUSY); +- sk_wmem_free_skb(sk, skb); +- } ++ tcp_remove_empty_skb(sk, skb); + +-do_error: + if (copied + copied_syn) + goto out; + out_err: