From: Greg Kroah-Hartman Date: Tue, 11 Sep 2018 10:08:20 +0000 (+0200) Subject: 4.9-stable patches X-Git-Tag: v4.4.156~37 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0e1d889092e021474b714314ab3f0c1b61aa0314;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: act_ife-fix-a-potential-deadlock.patch act_ife-fix-a-potential-use-after-free.patch act_ife-move-tcfa_lock-down-to-where-necessary.patch hv_netvsc-ignore-devices-that-are-not-pci.patch ipv4-tcp-send-zero-ipid-for-rst-and-ack-sent-in-syn-recv-and-time-wait-state.patch net-bcmgenet-use-mac-link-status-for-fixed-phy.patch net-sched-action_ife-take-reference-to-meta-module.patch net-sched-fix-memory-exposure-from-short-tca_u32_sel.patch qlge-fix-netdev-features-configuration.patch r8169-add-support-for-ncube-8168-network-card.patch sctp-hold-transport-before-accessing-its-asoc-in-sctp_transport_get_next.patch tcp-do-not-restart-timewait-timer-on-rst-reception.patch vhost-correctly-check-the-iova-range-when-waking-virtqueue.patch vti6-remove-skb-ignore_df-check-from-vti6_xmit.patch --- diff --git a/queue-4.9/act_ife-fix-a-potential-deadlock.patch b/queue-4.9/act_ife-fix-a-potential-deadlock.patch new file mode 100644 index 00000000000..012d6d60f13 --- /dev/null +++ b/queue-4.9/act_ife-fix-a-potential-deadlock.patch @@ -0,0 +1,108 @@ +From foo@baz Tue Sep 11 11:48:38 CEST 2018 +From: Cong Wang +Date: Sun, 19 Aug 2018 12:22:13 -0700 +Subject: act_ife: fix a potential deadlock + +From: Cong Wang + +[ Upstream commit 5ffe57da29b3802baeddaa40909682bbb4cb4d48 ] + +use_all_metadata() acquires read_lock(&ife_mod_lock), then calls +add_metainfo() which calls find_ife_oplist() which acquires the same +lock again. Deadlock! + +Introduce __add_metainfo() which accepts struct tcf_meta_ops *ops +as an additional parameter and let its callers to decide how +to find it. For use_all_metadata(), it already has ops, no +need to find it again, just call __add_metainfo() directly. + +And, as ife_mod_lock is only needed for find_ife_oplist(), +this means we can make non-atomic allocation for populate_metalist() +now. + +Fixes: 817e9f2c5c26 ("act_ife: acquire ife_mod_lock before reading ifeoplist") +Cc: Jamal Hadi Salim +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_ife.c | 34 +++++++++++++++++++++------------- + 1 file changed, 21 insertions(+), 13 deletions(-) + +--- a/net/sched/act_ife.c ++++ b/net/sched/act_ife.c +@@ -296,22 +296,16 @@ static int load_metaops_and_vet(u32 meta + + /* called when adding new meta information + */ +-static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, +- int len, bool atomic, bool exists) ++static int __add_metainfo(const struct tcf_meta_ops *ops, ++ struct tcf_ife_info *ife, u32 metaid, void *metaval, ++ int len, bool atomic, bool exists) + { + struct tcf_meta_info *mi = NULL; +- struct tcf_meta_ops *ops = find_ife_oplist(metaid); + int ret = 0; + +- if (!ops) +- return -ENOENT; +- + mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL); +- if (!mi) { +- /*put back what find_ife_oplist took */ +- module_put(ops->owner); ++ if (!mi) + return -ENOMEM; +- } + + mi->metaid = metaid; + mi->ops = ops; +@@ -319,7 +313,6 @@ static int add_metainfo(struct tcf_ife_i + ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL); + if (ret != 0) { + kfree(mi); +- module_put(ops->owner); + return ret; + } + } +@@ -333,6 +326,21 @@ static int add_metainfo(struct tcf_ife_i + return ret; + } + ++static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, ++ int len, bool exists) ++{ ++ const struct tcf_meta_ops *ops = find_ife_oplist(metaid); ++ int ret; ++ ++ if (!ops) ++ return -ENOENT; ++ ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists); ++ if (ret) ++ /*put back what find_ife_oplist took */ ++ module_put(ops->owner); ++ return ret; ++} ++ + static int use_all_metadata(struct tcf_ife_info *ife, bool exists) + { + struct tcf_meta_ops *o; +@@ -341,7 +349,7 @@ static int use_all_metadata(struct tcf_i + + read_lock(&ife_mod_lock); + list_for_each_entry(o, &ifeoplist, list) { +- rc = add_metainfo(ife, o->metaid, NULL, 0, true, exists); ++ rc = __add_metainfo(o, ife, o->metaid, NULL, 0, true, exists); + if (rc == 0) + installed += 1; + } +@@ -430,7 +438,7 @@ static int populate_metalist(struct tcf_ + if (rc != 0) + return rc; + +- rc = add_metainfo(ife, i, val, len, false, exists); ++ rc = add_metainfo(ife, i, val, len, exists); + if (rc) + return rc; + } diff --git a/queue-4.9/act_ife-fix-a-potential-use-after-free.patch b/queue-4.9/act_ife-fix-a-potential-use-after-free.patch new file mode 100644 index 00000000000..ef0a8c8402c --- /dev/null +++ b/queue-4.9/act_ife-fix-a-potential-use-after-free.patch @@ -0,0 +1,42 @@ +From foo@baz Tue Sep 11 11:48:38 CEST 2018 +From: Cong Wang +Date: Mon, 3 Sep 2018 11:08:15 -0700 +Subject: act_ife: fix a potential use-after-free + +From: Cong Wang + +[ Upstream commit 6d784f1625ea68783cc1fb17de8f6cd3e1660c3f ] + +Immediately after module_put(), user could delete this +module, so e->ops could be already freed before we call +e->ops->release(). + +Fix this by moving module_put() after ops->release(). + +Fixes: ef6980b6becb ("introduce IFE action") +Cc: Jamal Hadi Salim +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_ife.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sched/act_ife.c ++++ b/net/sched/act_ife.c +@@ -395,7 +395,6 @@ static void _tcf_ife_cleanup(struct tc_a + struct tcf_meta_info *e, *n; + + list_for_each_entry_safe(e, n, &ife->metalist, metalist) { +- module_put(e->ops->owner); + list_del(&e->metalist); + if (e->metaval) { + if (e->ops->release) +@@ -403,6 +402,7 @@ static void _tcf_ife_cleanup(struct tc_a + else + kfree(e->metaval); + } ++ module_put(e->ops->owner); + kfree(e); + } + } diff --git a/queue-4.9/act_ife-move-tcfa_lock-down-to-where-necessary.patch b/queue-4.9/act_ife-move-tcfa_lock-down-to-where-necessary.patch new file mode 100644 index 00000000000..2e7f116be4c --- /dev/null +++ b/queue-4.9/act_ife-move-tcfa_lock-down-to-where-necessary.patch @@ -0,0 +1,157 @@ +From foo@baz Tue Sep 11 11:15:51 CEST 2018 +From: Cong Wang +Date: Sun, 19 Aug 2018 12:22:12 -0700 +Subject: act_ife: move tcfa_lock down to where necessary + +From: Cong Wang + +[ Upstream commit 4e407ff5cd67ec76eeeea1deec227b7982dc7f66 ] + +The only time we need to take tcfa_lock is when adding +a new metainfo to an existing ife->metalist. We don't need +to take tcfa_lock so early and so broadly in tcf_ife_init(). + +This means we can always take ife_mod_lock first, avoid the +reverse locking ordering warning as reported by Vlad. + +Reported-by: Vlad Buslov +Tested-by: Vlad Buslov +Cc: Vlad Buslov +Cc: Jamal Hadi Salim +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_ife.c | 37 +++++++++++++------------------------ + 1 file changed, 13 insertions(+), 24 deletions(-) + +--- a/net/sched/act_ife.c ++++ b/net/sched/act_ife.c +@@ -267,10 +267,8 @@ static int ife_validate_metatype(struct + } + + /* called when adding new meta information +- * under ife->tcf_lock for existing action + */ +-static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, +- void *val, int len, bool exists) ++static int load_metaops_and_vet(u32 metaid, void *val, int len) + { + struct tcf_meta_ops *ops = find_ife_oplist(metaid); + int ret = 0; +@@ -278,13 +276,9 @@ static int load_metaops_and_vet(struct t + if (!ops) { + ret = -ENOENT; + #ifdef CONFIG_MODULES +- if (exists) +- spin_unlock_bh(&ife->tcf_lock); + rtnl_unlock(); + request_module("ifemeta%u", metaid); + rtnl_lock(); +- if (exists) +- spin_lock_bh(&ife->tcf_lock); + ops = find_ife_oplist(metaid); + #endif + } +@@ -301,10 +295,9 @@ static int load_metaops_and_vet(struct t + } + + /* called when adding new meta information +- * under ife->tcf_lock for existing action + */ + static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, +- int len, bool atomic) ++ int len, bool atomic, bool exists) + { + struct tcf_meta_info *mi = NULL; + struct tcf_meta_ops *ops = find_ife_oplist(metaid); +@@ -331,12 +324,16 @@ static int add_metainfo(struct tcf_ife_i + } + } + ++ if (exists) ++ spin_lock_bh(&ife->tcf_lock); + list_add_tail(&mi->metalist, &ife->metalist); ++ if (exists) ++ spin_unlock_bh(&ife->tcf_lock); + + return ret; + } + +-static int use_all_metadata(struct tcf_ife_info *ife) ++static int use_all_metadata(struct tcf_ife_info *ife, bool exists) + { + struct tcf_meta_ops *o; + int rc = 0; +@@ -344,7 +341,7 @@ static int use_all_metadata(struct tcf_i + + read_lock(&ife_mod_lock); + list_for_each_entry(o, &ifeoplist, list) { +- rc = add_metainfo(ife, o->metaid, NULL, 0, true); ++ rc = add_metainfo(ife, o->metaid, NULL, 0, true, exists); + if (rc == 0) + installed += 1; + } +@@ -416,7 +413,6 @@ static void tcf_ife_cleanup(struct tc_ac + spin_unlock_bh(&ife->tcf_lock); + } + +-/* under ife->tcf_lock for existing action */ + static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, + bool exists) + { +@@ -430,11 +426,11 @@ static int populate_metalist(struct tcf_ + val = nla_data(tb[i]); + len = nla_len(tb[i]); + +- rc = load_metaops_and_vet(ife, i, val, len, exists); ++ rc = load_metaops_and_vet(i, val, len); + if (rc != 0) + return rc; + +- rc = add_metainfo(ife, i, val, len, exists); ++ rc = add_metainfo(ife, i, val, len, false, exists); + if (rc) + return rc; + } +@@ -510,6 +506,8 @@ static int tcf_ife_init(struct net *net, + if (exists) + spin_lock_bh(&ife->tcf_lock); + ife->tcf_action = parm->action; ++ if (exists) ++ spin_unlock_bh(&ife->tcf_lock); + + if (parm->flags & IFE_ENCODE) { + if (daddr) +@@ -537,9 +535,6 @@ metadata_parse_err: + tcf_hash_release(*a, bind); + if (ret == ACT_P_CREATED) + _tcf_ife_cleanup(*a, bind); +- +- if (exists) +- spin_unlock_bh(&ife->tcf_lock); + return err; + } + +@@ -553,20 +548,14 @@ metadata_parse_err: + * as we can. You better have at least one else we are + * going to bail out + */ +- err = use_all_metadata(ife); ++ err = use_all_metadata(ife, exists); + if (err) { + if (ret == ACT_P_CREATED) + _tcf_ife_cleanup(*a, bind); +- +- if (exists) +- spin_unlock_bh(&ife->tcf_lock); + return err; + } + } + +- if (exists) +- spin_unlock_bh(&ife->tcf_lock); +- + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + diff --git a/queue-4.9/hv_netvsc-ignore-devices-that-are-not-pci.patch b/queue-4.9/hv_netvsc-ignore-devices-that-are-not-pci.patch new file mode 100644 index 00000000000..92402340541 --- /dev/null +++ b/queue-4.9/hv_netvsc-ignore-devices-that-are-not-pci.patch @@ -0,0 +1,47 @@ +From foo@baz Tue Sep 11 11:48:38 CEST 2018 +From: Stephen Hemminger +Date: Tue, 21 Aug 2018 10:40:38 -0700 +Subject: hv_netvsc: ignore devices that are not PCI + +From: Stephen Hemminger + +[ Upstream commit b93c1b5ac8643cc08bb74fa8ae21d6c63dfcb23d ] + +Registering another device with same MAC address (such as TAP, VPN or +DPDK KNI) will confuse the VF autobinding logic. Restrict the search +to only run if the device is known to be a PCI attached VF. + +Fixes: e8ff40d4bff1 ("hv_netvsc: improve VF device matching") +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc_drv.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1228,11 +1229,15 @@ static int netvsc_register_vf(struct net + { + struct net_device *ndev; + struct net_device_context *net_device_ctx; ++ struct device *pdev = vf_netdev->dev.parent; + struct netvsc_device *netvsc_dev; + + if (vf_netdev->addr_len != ETH_ALEN) + return NOTIFY_DONE; + ++ if (!pdev || !dev_is_pci(pdev) || dev_is_pf(pdev)) ++ return NOTIFY_DONE; ++ + /* + * We will use the MAC address to locate the synthetic interface to + * associate with the VF interface. If we don't find a matching diff --git a/queue-4.9/ipv4-tcp-send-zero-ipid-for-rst-and-ack-sent-in-syn-recv-and-time-wait-state.patch b/queue-4.9/ipv4-tcp-send-zero-ipid-for-rst-and-ack-sent-in-syn-recv-and-time-wait-state.patch new file mode 100644 index 00000000000..737d2a64087 --- /dev/null +++ b/queue-4.9/ipv4-tcp-send-zero-ipid-for-rst-and-ack-sent-in-syn-recv-and-time-wait-state.patch @@ -0,0 +1,51 @@ +From foo@baz Tue Sep 11 11:48:38 CEST 2018 +From: Eric Dumazet +Date: Wed, 22 Aug 2018 13:30:45 -0700 +Subject: ipv4: tcp: send zero IPID for RST and ACK sent in SYN-RECV and TIME-WAIT state + +From: Eric Dumazet + +[ Upstream commit 431280eebed9f5079553daf003011097763e71fd ] + +tcp uses per-cpu (and per namespace) sockets (net->ipv4.tcp_sk) internally +to send some control packets. + +1) RST packets, through tcp_v4_send_reset() +2) ACK packets in SYN-RECV and TIME-WAIT state, through tcp_v4_send_ack() + +These packets assert IP_DF, and also use the hashed IP ident generator +to provide an IPv4 ID number. + +Geoff Alexander reported this could be used to build off-path attacks. + +These packets should not be fragmented, since their size is smaller than +IPV4_MIN_MTU. Only some tunneled paths could eventually have to fragment, +regardless of inner IPID. + +We really can use zero IPID, to address the flaw, and as a bonus, +avoid a couple of atomic operations in ip_idents_reserve() + +Signed-off-by: Eric Dumazet +Reported-by: Geoff Alexander +Tested-by: Geoff Alexander +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_ipv4.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -2440,6 +2440,12 @@ static int __net_init tcp_sk_init(struct + if (res) + goto fail; + sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); ++ ++ /* Please enforce IP_DF and IPID==0 for RST and ++ * ACK sent in SYN-RECV and TIME-WAIT state. ++ */ ++ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; ++ + *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; + } + diff --git a/queue-4.9/net-bcmgenet-use-mac-link-status-for-fixed-phy.patch b/queue-4.9/net-bcmgenet-use-mac-link-status-for-fixed-phy.patch new file mode 100644 index 00000000000..b0eed2b228b --- /dev/null +++ b/queue-4.9/net-bcmgenet-use-mac-link-status-for-fixed-phy.patch @@ -0,0 +1,56 @@ +From foo@baz Tue Sep 11 11:48:38 CEST 2018 +From: Doug Berger +Date: Tue, 28 Aug 2018 12:33:15 -0700 +Subject: net: bcmgenet: use MAC link status for fixed phy + +From: Doug Berger + +[ Upstream commit c3c397c1f16c51601a3fac4fe0c63ad8aa85a904 ] + +When using the fixed PHY with GENET (e.g. MOCA) the PHY link +status can be determined from the internal link status captured +by the MAC. This allows the PHY state machine to use the correct +link state with the fixed PHY even if MAC link event interrupts +are missed when the net device is opened. + +Fixes: 8d88c6ebb34c ("net: bcmgenet: enable MoCA link state change detection") +Signed-off-by: Doug Berger +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/genet/bcmgenet.h | 3 +++ + drivers/net/ethernet/broadcom/genet/bcmmii.c | 10 ++++++++-- + 2 files changed, 11 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h ++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h +@@ -185,6 +185,9 @@ struct bcmgenet_mib_counters { + #define UMAC_MAC1 0x010 + #define UMAC_MAX_FRAME_LEN 0x014 + ++#define UMAC_MODE 0x44 ++#define MODE_LINK_STATUS (1 << 5) ++ + #define UMAC_EEE_CTRL 0x064 + #define EN_LPI_RX_PAUSE (1 << 0) + #define EN_LPI_TX_PFC (1 << 1) +--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c ++++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c +@@ -167,8 +167,14 @@ void bcmgenet_mii_setup(struct net_devic + static int bcmgenet_fixed_phy_link_update(struct net_device *dev, + struct fixed_phy_status *status) + { +- if (dev && dev->phydev && status) +- status->link = dev->phydev->link; ++ struct bcmgenet_priv *priv; ++ u32 reg; ++ ++ if (dev && dev->phydev && status) { ++ priv = netdev_priv(dev); ++ reg = bcmgenet_umac_readl(priv, UMAC_MODE); ++ status->link = !!(reg & MODE_LINK_STATUS); ++ } + + return 0; + } diff --git a/queue-4.9/net-sched-action_ife-take-reference-to-meta-module.patch b/queue-4.9/net-sched-action_ife-take-reference-to-meta-module.patch new file mode 100644 index 00000000000..1c42bc15894 --- /dev/null +++ b/queue-4.9/net-sched-action_ife-take-reference-to-meta-module.patch @@ -0,0 +1,144 @@ +From foo@baz Tue Sep 11 11:48:38 CEST 2018 +From: Vlad Buslov +Date: Tue, 4 Sep 2018 00:44:42 +0300 +Subject: net: sched: action_ife: take reference to meta module + +From: Vlad Buslov + +[ Upstream commit 84cb8eb26cb9ce3c79928094962a475a9d850a53 ] + +Recent refactoring of add_metainfo() caused use_all_metadata() to add +metainfo to ife action metalist without taking reference to module. This +causes warning in module_put called from ife action cleanup function. + +Implement add_metainfo_and_get_ops() function that returns with reference +to module taken if metainfo was added successfully, and call it from +use_all_metadata(), instead of calling __add_metainfo() directly. + +Example warning: + +[ 646.344393] WARNING: CPU: 1 PID: 2278 at kernel/module.c:1139 module_put+0x1cb/0x230 +[ 646.352437] Modules linked in: act_meta_skbtcindex act_meta_mark act_meta_skbprio act_ife ife veth nfsv3 nfs fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c tun ebtable_filter ebtables ip6table_filter ip6_tables bridge stp llc mlx5_ib ib_uverbs ib_core intel_rapl sb_edac x86_pkg_temp_thermal mlx5_core coretemp kvm_intel kvm nfsd igb irqbypass crct10dif_pclmul devlink crc32_pclmul mei_me joydev ses crc32c_intel enclosure auth_rpcgss i2c_algo_bit ioatdma ptp mei pps_core ghash_clmulni_intel iTCO_wdt iTCO_vendor_support pcspkr dca ipmi_ssif lpc_ich target_core_mod i2c_i801 ipmi_si ipmi_devintf pcc_cpufreq wmi ipmi_msghandler nfs_acl lockd acpi_pad acpi_power_meter grace sunrpc mpt3sas raid_class scsi_transport_sas +[ 646.425631] CPU: 1 PID: 2278 Comm: tc Not tainted 4.19.0-rc1+ #799 +[ 646.432187] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 +[ 646.440595] RIP: 0010:module_put+0x1cb/0x230 +[ 646.445238] Code: f3 66 94 02 e8 26 ff fa ff 85 c0 74 11 0f b6 1d 51 30 94 02 80 fb 01 77 60 83 e3 01 74 13 65 ff 0d 3a 83 db 73 e9 2b ff ff ff <0f> 0b e9 00 ff ff ff e8 59 01 fb ff 85 c0 75 e4 48 c7 c2 20 62 6b +[ 646.464997] RSP: 0018:ffff880354d37068 EFLAGS: 00010286 +[ 646.470599] RAX: 0000000000000000 RBX: ffffffffc0a52518 RCX: ffffffff8c2668db +[ 646.478118] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffffffffc0a52518 +[ 646.485641] RBP: ffffffffc0a52180 R08: fffffbfff814a4a4 R09: fffffbfff814a4a3 +[ 646.493164] R10: ffffffffc0a5251b R11: fffffbfff814a4a4 R12: 1ffff1006a9a6e0d +[ 646.500687] R13: 00000000ffffffff R14: ffff880362bab890 R15: dead000000000100 +[ 646.508213] FS: 00007f4164c99800(0000) GS:ffff88036fe40000(0000) knlGS:0000000000000000 +[ 646.516961] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 646.523080] CR2: 00007f41638b8420 CR3: 0000000351df0004 CR4: 00000000001606e0 +[ 646.530595] Call Trace: +[ 646.533408] ? find_symbol_in_section+0x260/0x260 +[ 646.538509] tcf_ife_cleanup+0x11b/0x200 [act_ife] +[ 646.543695] tcf_action_cleanup+0x29/0xa0 +[ 646.548078] __tcf_action_put+0x5a/0xb0 +[ 646.552289] ? nla_put+0x65/0xe0 +[ 646.555889] __tcf_idr_release+0x48/0x60 +[ 646.560187] tcf_generic_walker+0x448/0x6b0 +[ 646.564764] ? tcf_action_dump_1+0x450/0x450 +[ 646.569411] ? __lock_is_held+0x84/0x110 +[ 646.573720] ? tcf_ife_walker+0x10c/0x20f [act_ife] +[ 646.578982] tca_action_gd+0x972/0xc40 +[ 646.583129] ? tca_get_fill.constprop.17+0x250/0x250 +[ 646.588471] ? mark_lock+0xcf/0x980 +[ 646.592324] ? check_chain_key+0x140/0x1f0 +[ 646.596832] ? debug_show_all_locks+0x240/0x240 +[ 646.601839] ? memset+0x1f/0x40 +[ 646.605350] ? nla_parse+0xca/0x1a0 +[ 646.609217] tc_ctl_action+0x215/0x230 +[ 646.613339] ? tcf_action_add+0x220/0x220 +[ 646.617748] rtnetlink_rcv_msg+0x56a/0x6d0 +[ 646.622227] ? rtnl_fdb_del+0x3f0/0x3f0 +[ 646.626466] netlink_rcv_skb+0x18d/0x200 +[ 646.630752] ? rtnl_fdb_del+0x3f0/0x3f0 +[ 646.634959] ? netlink_ack+0x500/0x500 +[ 646.639106] netlink_unicast+0x2d0/0x370 +[ 646.643409] ? netlink_attachskb+0x340/0x340 +[ 646.648050] ? _copy_from_iter_full+0xe9/0x3e0 +[ 646.652870] ? import_iovec+0x11e/0x1c0 +[ 646.657083] netlink_sendmsg+0x3b9/0x6a0 +[ 646.661388] ? netlink_unicast+0x370/0x370 +[ 646.665877] ? netlink_unicast+0x370/0x370 +[ 646.670351] sock_sendmsg+0x6b/0x80 +[ 646.674212] ___sys_sendmsg+0x4a1/0x520 +[ 646.678443] ? copy_msghdr_from_user+0x210/0x210 +[ 646.683463] ? lock_downgrade+0x320/0x320 +[ 646.687849] ? debug_show_all_locks+0x240/0x240 +[ 646.692760] ? do_raw_spin_unlock+0xa2/0x130 +[ 646.697418] ? _raw_spin_unlock+0x24/0x30 +[ 646.701798] ? __handle_mm_fault+0x1819/0x1c10 +[ 646.706619] ? __pmd_alloc+0x320/0x320 +[ 646.710738] ? debug_show_all_locks+0x240/0x240 +[ 646.715649] ? restore_nameidata+0x7b/0xa0 +[ 646.720117] ? check_chain_key+0x140/0x1f0 +[ 646.724590] ? check_chain_key+0x140/0x1f0 +[ 646.729070] ? __fget_light+0xbc/0xd0 +[ 646.733121] ? __sys_sendmsg+0xd7/0x150 +[ 646.737329] __sys_sendmsg+0xd7/0x150 +[ 646.741359] ? __ia32_sys_shutdown+0x30/0x30 +[ 646.746003] ? up_read+0x53/0x90 +[ 646.749601] ? __do_page_fault+0x484/0x780 +[ 646.754105] ? do_syscall_64+0x1e/0x2c0 +[ 646.758320] do_syscall_64+0x72/0x2c0 +[ 646.762353] entry_SYSCALL_64_after_hwframe+0x49/0xbe +[ 646.767776] RIP: 0033:0x7f4163872150 +[ 646.771713] Code: 8b 15 3c 7d 2b 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb cd 66 0f 1f 44 00 00 83 3d b9 d5 2b 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be cd 00 00 48 89 04 24 +[ 646.791474] RSP: 002b:00007ffdef7d6b58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +[ 646.799721] RAX: ffffffffffffffda RBX: 0000000000000024 RCX: 00007f4163872150 +[ 646.807240] RDX: 0000000000000000 RSI: 00007ffdef7d6bd0 RDI: 0000000000000003 +[ 646.814760] RBP: 000000005b8b9482 R08: 0000000000000001 R09: 0000000000000000 +[ 646.822286] R10: 00000000000005e7 R11: 0000000000000246 R12: 00007ffdef7dad20 +[ 646.829807] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000679bc0 +[ 646.837360] irq event stamp: 6083 +[ 646.841043] hardirqs last enabled at (6081): [] __call_rcu+0x17d/0x500 +[ 646.849882] hardirqs last disabled at (6083): [] trace_hardirqs_off_thunk+0x1a/0x1c +[ 646.859775] softirqs last enabled at (5968): [] __do_softirq+0x4a1/0x6ee +[ 646.868784] softirqs last disabled at (6082): [] tcf_ife_cleanup+0x39/0x200 [act_ife] +[ 646.878845] ---[ end trace b1b8c12ffe51e657 ]--- + +Fixes: 5ffe57da29b3 ("act_ife: fix a potential deadlock") +Signed-off-by: Vlad Buslov +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_ife.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +--- a/net/sched/act_ife.c ++++ b/net/sched/act_ife.c +@@ -326,6 +326,20 @@ static int __add_metainfo(const struct t + return ret; + } + ++static int add_metainfo_and_get_ops(const struct tcf_meta_ops *ops, ++ struct tcf_ife_info *ife, u32 metaid, ++ bool exists) ++{ ++ int ret; ++ ++ if (!try_module_get(ops->owner)) ++ return -ENOENT; ++ ret = __add_metainfo(ops, ife, metaid, NULL, 0, true, exists); ++ if (ret) ++ module_put(ops->owner); ++ return ret; ++} ++ + static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, + int len, bool exists) + { +@@ -349,7 +363,7 @@ static int use_all_metadata(struct tcf_i + + read_lock(&ife_mod_lock); + list_for_each_entry(o, &ifeoplist, list) { +- rc = __add_metainfo(o, ife, o->metaid, NULL, 0, true, exists); ++ rc = add_metainfo_and_get_ops(o, ife, o->metaid, exists); + if (rc == 0) + installed += 1; + } diff --git a/queue-4.9/net-sched-fix-memory-exposure-from-short-tca_u32_sel.patch b/queue-4.9/net-sched-fix-memory-exposure-from-short-tca_u32_sel.patch new file mode 100644 index 00000000000..b5f898e2aa1 --- /dev/null +++ b/queue-4.9/net-sched-fix-memory-exposure-from-short-tca_u32_sel.patch @@ -0,0 +1,62 @@ +From foo@baz Tue Sep 11 11:15:51 CEST 2018 +From: Kees Cook +Date: Sat, 25 Aug 2018 22:58:01 -0700 +Subject: net: sched: Fix memory exposure from short TCA_U32_SEL + +From: Kees Cook + +[ Upstream commit 98c8f125fd8a6240ea343c1aa50a1be9047791b8 ] + +Via u32_change(), TCA_U32_SEL has an unspecified type in the netlink +policy, so max length isn't enforced, only minimum. This means nkeys +(from userspace) was being trusted without checking the actual size of +nla_len(), which could lead to a memory over-read, and ultimately an +exposure via a call to u32_dump(). Reachability is CAP_NET_ADMIN within +a namespace. + +Reported-by: Al Viro +Cc: Jamal Hadi Salim +Cc: Cong Wang +Cc: Jiri Pirko +Cc: "David S. Miller" +Cc: netdev@vger.kernel.org +Signed-off-by: Kees Cook +Acked-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_u32.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/sched/cls_u32.c ++++ b/net/sched/cls_u32.c +@@ -851,6 +851,7 @@ static int u32_change(struct net *net, s + struct nlattr *opt = tca[TCA_OPTIONS]; + struct nlattr *tb[TCA_U32_MAX + 1]; + u32 htid, flags = 0; ++ size_t sel_size; + int err; + #ifdef CONFIG_CLS_U32_PERF + size_t size; +@@ -967,8 +968,11 @@ static int u32_change(struct net *net, s + return -EINVAL; + + s = nla_data(tb[TCA_U32_SEL]); ++ sel_size = sizeof(*s) + sizeof(*s->keys) * s->nkeys; ++ if (nla_len(tb[TCA_U32_SEL]) < sel_size) ++ return -EINVAL; + +- n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); ++ n = kzalloc(offsetof(typeof(*n), sel) + sel_size, GFP_KERNEL); + if (n == NULL) + return -ENOBUFS; + +@@ -981,7 +985,7 @@ static int u32_change(struct net *net, s + } + #endif + +- memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); ++ memcpy(&n->sel, s, sel_size); + RCU_INIT_POINTER(n->ht_up, ht); + n->handle = handle; + n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; diff --git a/queue-4.9/qlge-fix-netdev-features-configuration.patch b/queue-4.9/qlge-fix-netdev-features-configuration.patch new file mode 100644 index 00000000000..9a69c6e2e9d --- /dev/null +++ b/queue-4.9/qlge-fix-netdev-features-configuration.patch @@ -0,0 +1,77 @@ +From foo@baz Tue Sep 11 11:48:38 CEST 2018 +From: Manish Chopra +Date: Thu, 23 Aug 2018 13:20:52 -0700 +Subject: qlge: Fix netdev features configuration. + +From: Manish Chopra + +[ Upstream commit 6750c87074c5b534d82fdaabb1deb45b8f1f57de ] + +qlge_fix_features() is not supposed to modify hardware or +driver state, rather it is supposed to only fix requested +fetures bits. Currently qlge_fix_features() also goes for +interface down and up unnecessarily if there is not even +any change in features set. + +This patch changes/fixes following - + +1) Move reload of interface or device re-config from + qlge_fix_features() to qlge_set_features(). +2) Reload of interface in qlge_set_features() only if + relevant feature bit (NETIF_F_HW_VLAN_CTAG_RX) is changed. +3) Get rid of qlge_fix_features() since driver is not really + required to fix any features bit. + +Signed-off-by: Manish +Reviewed-by: Benjamin Poirier +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qlge/qlge_main.c | 23 ++++++++--------------- + 1 file changed, 8 insertions(+), 15 deletions(-) + +--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c ++++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c +@@ -2387,26 +2387,20 @@ static int qlge_update_hw_vlan_features( + return status; + } + +-static netdev_features_t qlge_fix_features(struct net_device *ndev, +- netdev_features_t features) +-{ +- int err; +- +- /* Update the behavior of vlan accel in the adapter */ +- err = qlge_update_hw_vlan_features(ndev, features); +- if (err) +- return err; +- +- return features; +-} +- + static int qlge_set_features(struct net_device *ndev, + netdev_features_t features) + { + netdev_features_t changed = ndev->features ^ features; ++ int err; ++ ++ if (changed & NETIF_F_HW_VLAN_CTAG_RX) { ++ /* Update the behavior of vlan accel in the adapter */ ++ err = qlge_update_hw_vlan_features(ndev, features); ++ if (err) ++ return err; + +- if (changed & NETIF_F_HW_VLAN_CTAG_RX) + qlge_vlan_mode(ndev, features); ++ } + + return 0; + } +@@ -4719,7 +4713,6 @@ static const struct net_device_ops qlge_ + .ndo_set_mac_address = qlge_set_mac_address, + .ndo_validate_addr = eth_validate_addr, + .ndo_tx_timeout = qlge_tx_timeout, +- .ndo_fix_features = qlge_fix_features, + .ndo_set_features = qlge_set_features, + .ndo_vlan_rx_add_vid = qlge_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = qlge_vlan_rx_kill_vid, diff --git a/queue-4.9/r8169-add-support-for-ncube-8168-network-card.patch b/queue-4.9/r8169-add-support-for-ncube-8168-network-card.patch new file mode 100644 index 00000000000..b9ae0be030e --- /dev/null +++ b/queue-4.9/r8169-add-support-for-ncube-8168-network-card.patch @@ -0,0 +1,43 @@ +From foo@baz Tue Sep 11 11:48:38 CEST 2018 +From: Anthony Wong +Date: Fri, 31 Aug 2018 20:06:42 +0800 +Subject: r8169: add support for NCube 8168 network card + +From: Anthony Wong + +[ Upstream commit 9fd0e09a4e86499639653243edfcb417a05c5c46 ] + +This card identifies itself as: + Ethernet controller [0200]: NCube Device [10ff:8168] (rev 06) + Subsystem: TP-LINK Technologies Co., Ltd. Device [7470:3468] + +Adding a new entry to rtl8169_pci_tbl makes the card work. + +Link: http://launchpad.net/bugs/1788730 +Signed-off-by: Anthony Wong +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169.c | 1 + + include/linux/pci_ids.h | 2 ++ + 2 files changed, 3 insertions(+) + +--- a/drivers/net/ethernet/realtek/r8169.c ++++ b/drivers/net/ethernet/realtek/r8169.c +@@ -329,6 +329,7 @@ static const struct pci_device_id rtl816 + { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8161), 0, 0, RTL_CFG_1 }, + { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, RTL_CFG_0 }, + { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, ++ { PCI_DEVICE(PCI_VENDOR_ID_NCUBE, 0x8168), 0, 0, RTL_CFG_1 }, + { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 }, + { PCI_VENDOR_ID_DLINK, 0x4300, + PCI_VENDOR_ID_DLINK, 0x4b10, 0, 0, RTL_CFG_1 }, +--- a/include/linux/pci_ids.h ++++ b/include/linux/pci_ids.h +@@ -3054,4 +3054,6 @@ + + #define PCI_VENDOR_ID_OCZ 0x1b85 + ++#define PCI_VENDOR_ID_NCUBE 0x10ff ++ + #endif /* _LINUX_PCI_IDS_H */ diff --git a/queue-4.9/sctp-hold-transport-before-accessing-its-asoc-in-sctp_transport_get_next.patch b/queue-4.9/sctp-hold-transport-before-accessing-its-asoc-in-sctp_transport_get_next.patch new file mode 100644 index 00000000000..f04ac897584 --- /dev/null +++ b/queue-4.9/sctp-hold-transport-before-accessing-its-asoc-in-sctp_transport_get_next.patch @@ -0,0 +1,99 @@ +From foo@baz Tue Sep 11 11:48:38 CEST 2018 +From: Xin Long +Date: Mon, 27 Aug 2018 18:38:31 +0800 +Subject: sctp: hold transport before accessing its asoc in sctp_transport_get_next + +From: Xin Long + +[ Upstream commit bab1be79a5169ac748d8292b20c86d874022d7ba ] + +As Marcelo noticed, in sctp_transport_get_next, it is iterating over +transports but then also accessing the association directly, without +checking any refcnts before that, which can cause an use-after-free +Read. + +So fix it by holding transport before accessing the association. With +that, sctp_transport_hold calls can be removed in the later places. + +Fixes: 626d16f50f39 ("sctp: export some apis or variables for sctp_diag and reuse some for proc") +Reported-by: syzbot+fe62a0c9aa6a85c6de16@syzkaller.appspotmail.com +Signed-off-by: Xin Long +Acked-by: Neil Horman +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/proc.c | 4 ---- + net/sctp/socket.c | 22 +++++++++++++++------- + 2 files changed, 15 insertions(+), 11 deletions(-) + +--- a/net/sctp/proc.c ++++ b/net/sctp/proc.c +@@ -337,8 +337,6 @@ static int sctp_assocs_seq_show(struct s + } + + transport = (struct sctp_transport *)v; +- if (!sctp_transport_hold(transport)) +- return 0; + assoc = transport->asoc; + epb = &assoc->base; + sk = epb->sk; +@@ -428,8 +426,6 @@ static int sctp_remaddr_seq_show(struct + } + + transport = (struct sctp_transport *)v; +- if (!sctp_transport_hold(transport)) +- return 0; + assoc = transport->asoc; + + list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list, +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -4476,9 +4476,14 @@ struct sctp_transport *sctp_transport_ge + break; + } + ++ if (!sctp_transport_hold(t)) ++ continue; ++ + if (net_eq(sock_net(t->asoc->base.sk), net) && + t->asoc->peer.primary_path == t) + break; ++ ++ sctp_transport_put(t); + } + + return t; +@@ -4488,13 +4493,18 @@ struct sctp_transport *sctp_transport_ge + struct rhashtable_iter *iter, + int pos) + { +- void *obj = SEQ_START_TOKEN; ++ struct sctp_transport *t; ++ ++ if (!pos) ++ return SEQ_START_TOKEN; + +- while (pos && (obj = sctp_transport_get_next(net, iter)) && +- !IS_ERR(obj)) +- pos--; ++ while ((t = sctp_transport_get_next(net, iter)) && !IS_ERR(t)) { ++ if (!--pos) ++ break; ++ sctp_transport_put(t); ++ } + +- return obj; ++ return t; + } + + int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), +@@ -4556,8 +4566,6 @@ int sctp_for_each_transport(int (*cb)(st + for (; !IS_ERR_OR_NULL(obj); obj = sctp_transport_get_next(net, &hti)) { + struct sctp_transport *transport = obj; + +- if (!sctp_transport_hold(transport)) +- continue; + err = cb(transport, p); + sctp_transport_put(transport); + if (err) diff --git a/queue-4.9/series b/queue-4.9/series index af19e7d6c2e..8d09e71fbb8 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -1 +1,15 @@ x86-speculation-l1tf-fix-up-pte-pfn-conversion-for-pae.patch +act_ife-fix-a-potential-use-after-free.patch +ipv4-tcp-send-zero-ipid-for-rst-and-ack-sent-in-syn-recv-and-time-wait-state.patch +net-bcmgenet-use-mac-link-status-for-fixed-phy.patch +net-sched-fix-memory-exposure-from-short-tca_u32_sel.patch +qlge-fix-netdev-features-configuration.patch +r8169-add-support-for-ncube-8168-network-card.patch +tcp-do-not-restart-timewait-timer-on-rst-reception.patch +vti6-remove-skb-ignore_df-check-from-vti6_xmit.patch +sctp-hold-transport-before-accessing-its-asoc-in-sctp_transport_get_next.patch +vhost-correctly-check-the-iova-range-when-waking-virtqueue.patch +hv_netvsc-ignore-devices-that-are-not-pci.patch +act_ife-move-tcfa_lock-down-to-where-necessary.patch +act_ife-fix-a-potential-deadlock.patch +net-sched-action_ife-take-reference-to-meta-module.patch diff --git a/queue-4.9/tcp-do-not-restart-timewait-timer-on-rst-reception.patch b/queue-4.9/tcp-do-not-restart-timewait-timer-on-rst-reception.patch new file mode 100644 index 00000000000..92d4cd27aad --- /dev/null +++ b/queue-4.9/tcp-do-not-restart-timewait-timer-on-rst-reception.patch @@ -0,0 +1,122 @@ +From foo@baz Tue Sep 11 11:48:38 CEST 2018 +From: Florian Westphal +Date: Thu, 30 Aug 2018 14:24:29 +0200 +Subject: tcp: do not restart timewait timer on rst reception + +From: Florian Westphal + +[ Upstream commit 63cc357f7bba6729869565a12df08441a5995d9a ] + +RFC 1337 says: + ''Ignore RST segments in TIME-WAIT state. + If the 2 minute MSL is enforced, this fix avoids all three hazards.'' + +So with net.ipv4.tcp_rfc1337=1, expected behaviour is to have TIME-WAIT sk +expire rather than removing it instantly when a reset is received. + +However, Linux will also re-start the TIME-WAIT timer. + +This causes connect to fail when tying to re-use ports or very long +delays (until syn retry interval exceeds MSL). + +packetdrill test case: +// Demonstrate bogus rearming of TIME-WAIT timer in rfc1337 mode. +`sysctl net.ipv4.tcp_rfc1337=1` + +0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0.000 bind(3, ..., ...) = 0 +0.000 listen(3, 1) = 0 + +0.100 < S 0:0(0) win 29200 +0.100 > S. 0:0(0) ack 1 +0.200 < . 1:1(0) ack 1 win 257 +0.200 accept(3, ..., ...) = 4 + +// Receive first segment +0.310 < P. 1:1001(1000) ack 1 win 46 + +// Send one ACK +0.310 > . 1:1(0) ack 1001 + +// read 1000 byte +0.310 read(4, ..., 1000) = 1000 + +// Application writes 100 bytes +0.350 write(4, ..., 100) = 100 +0.350 > P. 1:101(100) ack 1001 + +// ACK +0.500 < . 1001:1001(0) ack 101 win 257 + +// close the connection +0.600 close(4) = 0 +0.600 > F. 101:101(0) ack 1001 win 244 + +// Our side is in FIN_WAIT_1 & waits for ack to fin +0.7 < . 1001:1001(0) ack 102 win 244 + +// Our side is in FIN_WAIT_2 with no outstanding data. +0.8 < F. 1001:1001(0) ack 102 win 244 +0.8 > . 102:102(0) ack 1002 win 244 + +// Our side is now in TIME_WAIT state, send ack for fin. +0.9 < F. 1002:1002(0) ack 102 win 244 +0.9 > . 102:102(0) ack 1002 win 244 + +// Peer reopens with in-window SYN: +1.000 < S 1000:1000(0) win 9200 + +// Therefore, reply with ACK. +1.000 > . 102:102(0) ack 1002 win 244 + +// Peer sends RST for this ACK. Normally this RST results +// in tw socket removal, but rfc1337=1 setting prevents this. +1.100 < R 1002:1002(0) win 244 + +// second syn. Due to rfc1337=1 expect another pure ACK. +31.0 < S 1000:1000(0) win 9200 +31.0 > . 102:102(0) ack 1002 win 244 + +// .. and another RST from peer. +31.1 < R 1002:1002(0) win 244 +31.2 `echo no timer restart;ss -m -e -a -i -n -t -o state TIME-WAIT` + +// third syn after one minute. Time-Wait socket should have expired by now. +63.0 < S 1000:1000(0) win 9200 + +// so we expect a syn-ack & 3whs to proceed from here on. +63.0 > S. 0:0(0) ack 1 + +Without this patch, 'ss' shows restarts of tw timer and last packet is +thus just another pure ack, more than one minute later. + +This restores the original code from commit 283fd6cf0be690a83 +("Merge in ANK networking jumbo patch") in netdev-vger-cvs.git . + +For some reason the else branch was removed/lost in 1f28b683339f7 +("Merge in TCP/UDP optimizations and [..]") and timer restart became +unconditional. + +Reported-by: Michal Tesar +Signed-off-by: Florian Westphal +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_minisocks.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_minisocks.c ++++ b/net/ipv4/tcp_minisocks.c +@@ -194,8 +194,9 @@ kill: + inet_twsk_deschedule_put(tw); + return TCP_TW_SUCCESS; + } ++ } else { ++ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); + } +- inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); + + if (tmp_opt.saw_tstamp) { + tcptw->tw_ts_recent = tmp_opt.rcv_tsval; diff --git a/queue-4.9/vhost-correctly-check-the-iova-range-when-waking-virtqueue.patch b/queue-4.9/vhost-correctly-check-the-iova-range-when-waking-virtqueue.patch new file mode 100644 index 00000000000..a8c5e986bf7 --- /dev/null +++ b/queue-4.9/vhost-correctly-check-the-iova-range-when-waking-virtqueue.patch @@ -0,0 +1,37 @@ +From foo@baz Tue Sep 11 11:48:38 CEST 2018 +From: Jason Wang +Date: Fri, 24 Aug 2018 16:53:13 +0800 +Subject: vhost: correctly check the iova range when waking virtqueue + +From: Jason Wang + +[ Upstream commit 2d66f997f0545c8f7fc5cf0b49af1decb35170e7 ] + +We don't wakeup the virtqueue if the first byte of pending iova range +is the last byte of the range we just got updated. This will lead a +virtqueue to wait for IOTLB updating forever. Fixing by correct the +check and wake up the virtqueue in this case. + +Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API") +Reported-by: Peter Xu +Signed-off-by: Jason Wang +Reviewed-by: Peter Xu +Tested-by: Peter Xu +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/vhost.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/vhost/vhost.c ++++ b/drivers/vhost/vhost.c +@@ -905,7 +905,7 @@ static void vhost_iotlb_notify_vq(struct + list_for_each_entry_safe(node, n, &d->pending_list, node) { + struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; + if (msg->iova <= vq_msg->iova && +- msg->iova + msg->size - 1 > vq_msg->iova && ++ msg->iova + msg->size - 1 >= vq_msg->iova && + vq_msg->type == VHOST_IOTLB_MISS) { + vhost_poll_queue(&node->vq->poll); + list_del(&node->node); diff --git a/queue-4.9/vti6-remove-skb-ignore_df-check-from-vti6_xmit.patch b/queue-4.9/vti6-remove-skb-ignore_df-check-from-vti6_xmit.patch new file mode 100644 index 00000000000..7feb2227418 --- /dev/null +++ b/queue-4.9/vti6-remove-skb-ignore_df-check-from-vti6_xmit.patch @@ -0,0 +1,42 @@ +From foo@baz Tue Sep 11 11:48:38 CEST 2018 +From: Alexey Kodanev +Date: Thu, 23 Aug 2018 19:49:54 +0300 +Subject: vti6: remove !skb->ignore_df check from vti6_xmit() + +From: Alexey Kodanev + +[ Upstream commit 9f2895461439fda2801a7906fb4c5fb3dbb37a0a ] + +Before the commit d6990976af7c ("vti6: fix PMTU caching and reporting +on xmit") '!skb->ignore_df' check was always true because the function +skb_scrub_packet() was called before it, resetting ignore_df to zero. + +In the commit, skb_scrub_packet() was moved below, and now this check +can be false for the packet, e.g. when sending it in the two fragments, +this prevents successful PMTU updates in such case. The next attempts +to send the packet lead to the same tx error. Moreover, vti6 initial +MTU value relies on PMTU adjustments. + +This issue can be reproduced with the following LTP test script: + udp_ipsec_vti.sh -6 -p ah -m tunnel -s 2000 + +Fixes: ccd740cbc6e0 ("vti6: Add pmtu handling to vti6_xmit.") +Signed-off-by: Alexey Kodanev +Acked-by: Steffen Klassert +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_vti.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/ip6_vti.c ++++ b/net/ipv6/ip6_vti.c +@@ -481,7 +481,7 @@ vti6_xmit(struct sk_buff *skb, struct ne + } + + mtu = dst_mtu(dst); +- if (!skb->ignore_df && skb->len > mtu) { ++ if (skb->len > mtu) { + skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IPV6)) {