From: Greg Kroah-Hartman Date: Thu, 2 Jan 2020 11:15:05 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v4.4.208~12 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8107edbe6a75e281e0299391d72e8d934792a4da;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: gtp-avoid-zero-size-hashtable.patch gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch ptp-create-pins-together-with-the-rest-of-attributes.patch ptp-do-not-explicitly-set-drvdata-in-ptp_clock_register.patch ptp-fix-pass-zero-to-err_ptr-in-ptp_clock_register.patch ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch ptp-use-is_visible-method-to-hide-unused-attributes.patch ptp-use-kcalloc-when-allocating-arrays.patch tcp-dccp-fix-possible-race-__inet_lookup_established.patch tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch --- diff --git a/queue-4.9/gtp-avoid-zero-size-hashtable.patch b/queue-4.9/gtp-avoid-zero-size-hashtable.patch new file mode 100644 index 00000000000..2129c501c09 --- /dev/null +++ b/queue-4.9/gtp-avoid-zero-size-hashtable.patch @@ -0,0 +1,39 @@ +From foo@baz Thu 02 Jan 2020 11:49:15 AM CET +From: Taehee Yoo +Date: Wed, 11 Dec 2019 08:23:48 +0000 +Subject: gtp: avoid zero size hashtable + +From: Taehee Yoo + +[ Upstream commit 6a902c0f31993ab02e1b6ea7085002b9c9083b6a ] + +GTP default hashtable size is 1024 and userspace could set specific +hashtable size with IFLA_GTP_PDP_HASHSIZE. If hashtable size is set to 0 +from userspace, hashtable will not work and panic will occur. + +Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") +Signed-off-by: Taehee Yoo +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/gtp.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -677,10 +677,13 @@ static int gtp_newlink(struct net *src_n + if (err < 0) + goto out_err; + +- if (!data[IFLA_GTP_PDP_HASHSIZE]) ++ if (!data[IFLA_GTP_PDP_HASHSIZE]) { + hashsize = 1024; +- else ++ } else { + hashsize = nla_get_u32(data[IFLA_GTP_PDP_HASHSIZE]); ++ if (!hashsize) ++ hashsize = 1024; ++ } + + err = gtp_hashtable_new(gtp, hashsize); + if (err < 0) diff --git a/queue-4.9/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch b/queue-4.9/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch new file mode 100644 index 00000000000..50c0eeab0c7 --- /dev/null +++ b/queue-4.9/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch @@ -0,0 +1,102 @@ +From foo@baz Thu 02 Jan 2020 11:49:15 AM CET +From: Taehee Yoo +Date: Wed, 11 Dec 2019 08:23:17 +0000 +Subject: gtp: fix wrong condition in gtp_genl_dump_pdp() + +From: Taehee Yoo + +[ Upstream commit 94a6d9fb88df43f92d943c32b84ce398d50bf49f ] + +gtp_genl_dump_pdp() is ->dumpit() callback of GTP module and it is used +to dump pdp contexts. it would be re-executed because of dump packet size. + +If dump packet size is too big, it saves current dump pointer +(gtp interface pointer, bucket, TID value) then it restarts dump from +last pointer. +Current GTP code allows adding zero TID pdp context but dump code +ignores zero TID value. So, last dump pointer will not be found. + +In addition, this patch adds missing rcu_read_lock() in +gtp_genl_dump_pdp(). + +Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") +Signed-off-by: Taehee Yoo +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/gtp.c | 36 +++++++++++++++++++----------------- + 1 file changed, 19 insertions(+), 17 deletions(-) + +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -42,7 +42,6 @@ struct pdp_ctx { + struct hlist_node hlist_addr; + + union { +- u64 tid; + struct { + u64 tid; + u16 flow; +@@ -1221,43 +1220,46 @@ static int gtp_genl_dump_pdp(struct sk_b + struct netlink_callback *cb) + { + struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp; ++ int i, j, bucket = cb->args[0], skip = cb->args[1]; + struct net *net = sock_net(skb->sk); +- struct gtp_net *gn = net_generic(net, gtp_net_id); +- unsigned long tid = cb->args[1]; +- int i, k = cb->args[0], ret; + struct pdp_ctx *pctx; ++ struct gtp_net *gn; ++ ++ gn = net_generic(net, gtp_net_id); + + if (cb->args[4]) + return 0; + ++ rcu_read_lock(); + list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) { + if (last_gtp && last_gtp != gtp) + continue; + else + last_gtp = NULL; + +- for (i = k; i < gtp->hash_size; i++) { +- hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) { +- if (tid && tid != pctx->u.tid) +- continue; +- else +- tid = 0; +- +- ret = gtp_genl_fill_info(skb, +- NETLINK_CB(cb->skb).portid, +- cb->nlh->nlmsg_seq, +- cb->nlh->nlmsg_type, pctx); +- if (ret < 0) { ++ for (i = bucket; i < gtp->hash_size; i++) { ++ j = 0; ++ hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], ++ hlist_tid) { ++ if (j >= skip && ++ gtp_genl_fill_info(skb, ++ NETLINK_CB(cb->skb).portid, ++ cb->nlh->nlmsg_seq, ++ cb->nlh->nlmsg_type, pctx)) { + cb->args[0] = i; +- cb->args[1] = pctx->u.tid; ++ cb->args[1] = j; + cb->args[2] = (unsigned long)gtp; + goto out; + } ++ j++; + } ++ skip = 0; + } ++ bucket = 0; + } + cb->args[4] = 1; + out: ++ rcu_read_unlock(); + return skb->len; + } + diff --git a/queue-4.9/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch b/queue-4.9/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch new file mode 100644 index 00000000000..fd62f5fbd41 --- /dev/null +++ b/queue-4.9/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch @@ -0,0 +1,54 @@ +From foo@baz Thu 02 Jan 2020 11:49:15 AM CET +From: Netanel Belgazal +Date: Tue, 10 Dec 2019 11:27:44 +0000 +Subject: net: ena: fix napi handler misbehavior when the napi budget is zero + +From: Netanel Belgazal + +[ Upstream commit 24dee0c7478d1a1e00abdf5625b7f921467325dc ] + +In netpoll the napi handler could be called with budget equal to zero. +Current ENA napi handler doesn't take that into consideration. + +The napi handler handles Rx packets in a do-while loop. +Currently, the budget check happens only after decrementing the +budget, therefore the napi handler, in rare cases, could run over +MAX_INT packets. + +In addition to that, this moves all budget related variables to int +calculation and stop mixing u32 to avoid ambiguity + +Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") +Signed-off-by: Netanel Belgazal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -1105,8 +1105,8 @@ static int ena_io_poll(struct napi_struc + struct ena_ring *tx_ring, *rx_ring; + struct ena_eth_io_intr_reg intr_reg; + +- u32 tx_work_done; +- u32 rx_work_done; ++ int tx_work_done; ++ int rx_work_done = 0; + int tx_budget; + int napi_comp_call = 0; + int ret; +@@ -1122,7 +1122,11 @@ static int ena_io_poll(struct napi_struc + } + + tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget); +- rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); ++ /* On netpoll the budget is zero and the handler should only clean the ++ * tx completions. ++ */ ++ if (likely(budget)) ++ rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); + + if ((budget > rx_work_done) && (tx_budget > tx_work_done)) { + napi_complete_done(napi, rx_work_done); diff --git a/queue-4.9/ptp-create-pins-together-with-the-rest-of-attributes.patch b/queue-4.9/ptp-create-pins-together-with-the-rest-of-attributes.patch new file mode 100644 index 00000000000..66282f23c24 --- /dev/null +++ b/queue-4.9/ptp-create-pins-together-with-the-rest-of-attributes.patch @@ -0,0 +1,161 @@ +From 85a66e55019583da1e0f18706b7a8281c9f6de5b Mon Sep 17 00:00:00 2001 +From: Dmitry Torokhov +Date: Tue, 14 Feb 2017 10:23:34 -0800 +Subject: ptp: create "pins" together with the rest of attributes + +From: Dmitry Torokhov + +commit 85a66e55019583da1e0f18706b7a8281c9f6de5b upstream. + +Let's switch to using device_create_with_groups(), which will allow us to +create "pins" attribute group together with the rest of ptp device +attributes, and before userspace gets notified about ptp device creation. + +Signed-off-by: Dmitry Torokhov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ptp/ptp_clock.c | 20 +++++++++++--------- + drivers/ptp/ptp_private.h | 7 ++++--- + drivers/ptp/ptp_sysfs.c | 39 +++++++++------------------------------ + 3 files changed, 24 insertions(+), 42 deletions(-) + +--- a/drivers/ptp/ptp_clock.c ++++ b/drivers/ptp/ptp_clock.c +@@ -214,16 +214,17 @@ struct ptp_clock *ptp_clock_register(str + mutex_init(&ptp->pincfg_mux); + init_waitqueue_head(&ptp->tsev_wq); + ++ err = ptp_populate_pin_groups(ptp); ++ if (err) ++ goto no_pin_groups; ++ + /* Create a new device in our class. */ +- ptp->dev = device_create(ptp_class, parent, ptp->devid, ptp, +- "ptp%d", ptp->index); ++ ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid, ++ ptp, ptp->pin_attr_groups, ++ "ptp%d", ptp->index); + if (IS_ERR(ptp->dev)) + goto no_device; + +- err = ptp_populate_sysfs(ptp); +- if (err) +- goto no_sysfs; +- + /* Register a new PPS source. */ + if (info->pps) { + struct pps_source_info pps; +@@ -251,10 +252,10 @@ no_clock: + if (ptp->pps_source) + pps_unregister_source(ptp->pps_source); + no_pps: +- ptp_cleanup_sysfs(ptp); +-no_sysfs: + device_destroy(ptp_class, ptp->devid); + no_device: ++ ptp_cleanup_pin_groups(ptp); ++no_pin_groups: + mutex_destroy(&ptp->tsevq_mux); + mutex_destroy(&ptp->pincfg_mux); + ida_simple_remove(&ptp_clocks_map, index); +@@ -273,8 +274,9 @@ int ptp_clock_unregister(struct ptp_cloc + /* Release the clock's resources. */ + if (ptp->pps_source) + pps_unregister_source(ptp->pps_source); +- ptp_cleanup_sysfs(ptp); ++ + device_destroy(ptp_class, ptp->devid); ++ ptp_cleanup_pin_groups(ptp); + + posix_clock_unregister(&ptp->clock); + return 0; +--- a/drivers/ptp/ptp_private.h ++++ b/drivers/ptp/ptp_private.h +@@ -54,6 +54,8 @@ struct ptp_clock { + struct device_attribute *pin_dev_attr; + struct attribute **pin_attr; + struct attribute_group pin_attr_group; ++ /* 1st entry is a pointer to the real group, 2nd is NULL terminator */ ++ const struct attribute_group *pin_attr_groups[2]; + }; + + /* +@@ -94,8 +96,7 @@ uint ptp_poll(struct posix_clock *pc, + + extern const struct attribute_group *ptp_groups[]; + +-int ptp_cleanup_sysfs(struct ptp_clock *ptp); +- +-int ptp_populate_sysfs(struct ptp_clock *ptp); ++int ptp_populate_pin_groups(struct ptp_clock *ptp); ++void ptp_cleanup_pin_groups(struct ptp_clock *ptp); + + #endif +--- a/drivers/ptp/ptp_sysfs.c ++++ b/drivers/ptp/ptp_sysfs.c +@@ -268,25 +268,14 @@ static ssize_t ptp_pin_store(struct devi + return count; + } + +-int ptp_cleanup_sysfs(struct ptp_clock *ptp) ++int ptp_populate_pin_groups(struct ptp_clock *ptp) + { +- struct device *dev = ptp->dev; +- struct ptp_clock_info *info = ptp->info; +- +- if (info->n_pins) { +- sysfs_remove_group(&dev->kobj, &ptp->pin_attr_group); +- kfree(ptp->pin_attr); +- kfree(ptp->pin_dev_attr); +- } +- return 0; +-} +- +-static int ptp_populate_pins(struct ptp_clock *ptp) +-{ +- struct device *dev = ptp->dev; + struct ptp_clock_info *info = ptp->info; + int err = -ENOMEM, i, n_pins = info->n_pins; + ++ if (!n_pins) ++ return 0; ++ + ptp->pin_dev_attr = kcalloc(n_pins, sizeof(*ptp->pin_dev_attr), + GFP_KERNEL); + if (!ptp->pin_dev_attr) +@@ -309,28 +298,18 @@ static int ptp_populate_pins(struct ptp_ + ptp->pin_attr_group.name = "pins"; + ptp->pin_attr_group.attrs = ptp->pin_attr; + +- err = sysfs_create_group(&dev->kobj, &ptp->pin_attr_group); +- if (err) +- goto no_group; ++ ptp->pin_attr_groups[0] = &ptp->pin_attr_group; ++ + return 0; + +-no_group: +- kfree(ptp->pin_attr); + no_pin_attr: + kfree(ptp->pin_dev_attr); + no_dev_attr: + return err; + } + +-int ptp_populate_sysfs(struct ptp_clock *ptp) ++void ptp_cleanup_pin_groups(struct ptp_clock *ptp) + { +- struct ptp_clock_info *info = ptp->info; +- int err; +- +- if (info->n_pins) { +- err = ptp_populate_pins(ptp); +- if (err) +- return err; +- } +- return 0; ++ kfree(ptp->pin_attr); ++ kfree(ptp->pin_dev_attr); + } diff --git a/queue-4.9/ptp-do-not-explicitly-set-drvdata-in-ptp_clock_register.patch b/queue-4.9/ptp-do-not-explicitly-set-drvdata-in-ptp_clock_register.patch new file mode 100644 index 00000000000..bf842fede21 --- /dev/null +++ b/queue-4.9/ptp-do-not-explicitly-set-drvdata-in-ptp_clock_register.patch @@ -0,0 +1,32 @@ +From 882f312dc0751c973db26478f07f082c584d16aa Mon Sep 17 00:00:00 2001 +From: Dmitry Torokhov +Date: Tue, 14 Feb 2017 10:23:31 -0800 +Subject: ptp: do not explicitly set drvdata in ptp_clock_register() + +From: Dmitry Torokhov + +commit 882f312dc0751c973db26478f07f082c584d16aa upstream. + +We do not need explicitly call dev_set_drvdata(), as it is done for us by +device_create(). + +Acked-by: Richard Cochran +Signed-off-by: Dmitry Torokhov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ptp/ptp_clock.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/ptp/ptp_clock.c ++++ b/drivers/ptp/ptp_clock.c +@@ -220,8 +220,6 @@ struct ptp_clock *ptp_clock_register(str + if (IS_ERR(ptp->dev)) + goto no_device; + +- dev_set_drvdata(ptp->dev, ptp); +- + err = ptp_populate_sysfs(ptp); + if (err) + goto no_sysfs; diff --git a/queue-4.9/ptp-fix-pass-zero-to-err_ptr-in-ptp_clock_register.patch b/queue-4.9/ptp-fix-pass-zero-to-err_ptr-in-ptp_clock_register.patch new file mode 100644 index 00000000000..5951c5118ff --- /dev/null +++ b/queue-4.9/ptp-fix-pass-zero-to-err_ptr-in-ptp_clock_register.patch @@ -0,0 +1,49 @@ +From aea0a897af9e44c258e8ab9296fad417f1bc063a Mon Sep 17 00:00:00 2001 +From: YueHaibing +Date: Fri, 23 Nov 2018 09:54:55 +0800 +Subject: ptp: Fix pass zero to ERR_PTR() in ptp_clock_register + +From: YueHaibing + +commit aea0a897af9e44c258e8ab9296fad417f1bc063a upstream. + +Fix smatch warning: + +drivers/ptp/ptp_clock.c:298 ptp_clock_register() warn: + passing zero to 'ERR_PTR' + +'err' should be set while device_create_with_groups and +pps_register_source fails + +Fixes: 85a66e550195 ("ptp: create "pins" together with the rest of attributes") +Signed-off-by: YueHaibing +Acked-by: Richard Cochran +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ptp/ptp_clock.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/ptp/ptp_clock.c ++++ b/drivers/ptp/ptp_clock.c +@@ -222,8 +222,10 @@ struct ptp_clock *ptp_clock_register(str + ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid, + ptp, ptp->pin_attr_groups, + "ptp%d", ptp->index); +- if (IS_ERR(ptp->dev)) ++ if (IS_ERR(ptp->dev)) { ++ err = PTR_ERR(ptp->dev); + goto no_device; ++ } + + /* Register a new PPS source. */ + if (info->pps) { +@@ -234,6 +236,7 @@ struct ptp_clock *ptp_clock_register(str + pps.owner = info->owner; + ptp->pps_source = pps_register_source(&pps, PTP_PPS_DEFAULTS); + if (!ptp->pps_source) { ++ err = -EINVAL; + pr_err("failed to register pps source\n"); + goto no_pps; + } diff --git a/queue-4.9/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch b/queue-4.9/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch new file mode 100644 index 00000000000..d1ffe4eef53 --- /dev/null +++ b/queue-4.9/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch @@ -0,0 +1,317 @@ +From foo@baz Thu 02 Jan 2020 11:49:15 AM CET +From: Vladis Dronov +Date: Fri, 27 Dec 2019 03:26:27 +0100 +Subject: ptp: fix the race between the release of ptp_clock and cdev + +From: Vladis Dronov + +[ Upstream commit a33121e5487b424339636b25c35d3a180eaa5f5e ] + +In a case when a ptp chardev (like /dev/ptp0) is open but an underlying +device is removed, closing this file leads to a race. This reproduces +easily in a kvm virtual machine: + +ts# cat openptp0.c +int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } +ts# uname -r +5.5.0-rc3-46cf053e +ts# cat /proc/cmdline +... slub_debug=FZP +ts# modprobe ptp_kvm +ts# ./openptp0 & +[1] 670 +opened /dev/ptp0, sleeping 10s... +ts# rmmod ptp_kvm +ts# ls /dev/ptp* +ls: cannot access '/dev/ptp*': No such file or directory +ts# ...woken up +[ 48.010809] general protection fault: 0000 [#1] SMP +[ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 +[ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... +[ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 +[ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 +[ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 +[ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b +[ 48.019470] ... ^^^ a slub poison +[ 48.023854] Call Trace: +[ 48.024050] __fput+0x21f/0x240 +[ 48.024288] task_work_run+0x79/0x90 +[ 48.024555] do_exit+0x2af/0xab0 +[ 48.024799] ? vfs_write+0x16a/0x190 +[ 48.025082] do_group_exit+0x35/0x90 +[ 48.025387] __x64_sys_exit_group+0xf/0x10 +[ 48.025737] do_syscall_64+0x3d/0x130 +[ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 48.026479] RIP: 0033:0x7f53b12082f6 +[ 48.026792] ... +[ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] +[ 48.045001] Fixing recursive fault but reboot is needed! + +This happens in: + +static void __fput(struct file *file) +{ ... + if (file->f_op->release) + file->f_op->release(inode, file); <<< cdev is kfree'd here + if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && + !(mode & FMODE_PATH))) { + cdev_put(inode->i_cdev); <<< cdev fields are accessed here + +Namely: + +__fput() + posix_clock_release() + kref_put(&clk->kref, delete_clock) <<< the last reference + delete_clock() + delete_ptp_clock() + kfree(ptp) <<< cdev is embedded in ptp + cdev_put + module_put(p->owner) <<< *p is kfree'd, bang! + +Here cdev is embedded in posix_clock which is embedded in ptp_clock. +The race happens because ptp_clock's lifetime is controlled by two +refcounts: kref and cdev.kobj in posix_clock. This is wrong. + +Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() +created especially for such cases. This way the parent device with its +ptp_clock is not released until all references to the cdev are released. +This adds a requirement that an initialized but not exposed struct +device should be provided to posix_clock_register() by a caller instead +of a simple dev_t. + +This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix +the race between the release of watchdog_core_data and cdev"). See +details of the implementation in the commit 233ed09d7fda ("chardev: add +helper function to register char devs with a struct device"). + +Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u +Analyzed-by: Stephen Johnston +Analyzed-by: Vern Lovejoy +Signed-off-by: Vladis Dronov +Acked-by: Richard Cochran +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ptp/ptp_clock.c | 31 ++++++++++++++----------------- + drivers/ptp/ptp_private.h | 2 +- + include/linux/posix-clock.h | 19 +++++++++++-------- + kernel/time/posix-clock.c | 31 +++++++++++++------------------ + 4 files changed, 39 insertions(+), 44 deletions(-) + +--- a/drivers/ptp/ptp_clock.c ++++ b/drivers/ptp/ptp_clock.c +@@ -171,9 +171,9 @@ static struct posix_clock_operations ptp + .read = ptp_read, + }; + +-static void delete_ptp_clock(struct posix_clock *pc) ++static void ptp_clock_release(struct device *dev) + { +- struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); ++ struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev); + + mutex_destroy(&ptp->tsevq_mux); + mutex_destroy(&ptp->pincfg_mux); +@@ -205,7 +205,6 @@ struct ptp_clock *ptp_clock_register(str + } + + ptp->clock.ops = ptp_clock_ops; +- ptp->clock.release = delete_ptp_clock; + ptp->info = info; + ptp->devid = MKDEV(major, index); + ptp->index = index; +@@ -218,15 +217,6 @@ struct ptp_clock *ptp_clock_register(str + if (err) + goto no_pin_groups; + +- /* Create a new device in our class. */ +- ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid, +- ptp, ptp->pin_attr_groups, +- "ptp%d", ptp->index); +- if (IS_ERR(ptp->dev)) { +- err = PTR_ERR(ptp->dev); +- goto no_device; +- } +- + /* Register a new PPS source. */ + if (info->pps) { + struct pps_source_info pps; +@@ -242,8 +232,18 @@ struct ptp_clock *ptp_clock_register(str + } + } + +- /* Create a posix clock. */ +- err = posix_clock_register(&ptp->clock, ptp->devid); ++ /* Initialize a new device of our class in our clock structure. */ ++ device_initialize(&ptp->dev); ++ ptp->dev.devt = ptp->devid; ++ ptp->dev.class = ptp_class; ++ ptp->dev.parent = parent; ++ ptp->dev.groups = ptp->pin_attr_groups; ++ ptp->dev.release = ptp_clock_release; ++ dev_set_drvdata(&ptp->dev, ptp); ++ dev_set_name(&ptp->dev, "ptp%d", ptp->index); ++ ++ /* Create a posix clock and link it to the device. */ ++ err = posix_clock_register(&ptp->clock, &ptp->dev); + if (err) { + pr_err("failed to create posix clock\n"); + goto no_clock; +@@ -255,8 +255,6 @@ no_clock: + if (ptp->pps_source) + pps_unregister_source(ptp->pps_source); + no_pps: +- device_destroy(ptp_class, ptp->devid); +-no_device: + ptp_cleanup_pin_groups(ptp); + no_pin_groups: + mutex_destroy(&ptp->tsevq_mux); +@@ -278,7 +276,6 @@ int ptp_clock_unregister(struct ptp_cloc + if (ptp->pps_source) + pps_unregister_source(ptp->pps_source); + +- device_destroy(ptp_class, ptp->devid); + ptp_cleanup_pin_groups(ptp); + + posix_clock_unregister(&ptp->clock); +--- a/drivers/ptp/ptp_private.h ++++ b/drivers/ptp/ptp_private.h +@@ -40,7 +40,7 @@ struct timestamp_event_queue { + + struct ptp_clock { + struct posix_clock clock; +- struct device *dev; ++ struct device dev; + struct ptp_clock_info *info; + dev_t devid; + int index; /* index into clocks.map */ +--- a/include/linux/posix-clock.h ++++ b/include/linux/posix-clock.h +@@ -104,29 +104,32 @@ struct posix_clock_operations { + * + * @ops: Functional interface to the clock + * @cdev: Character device instance for this clock +- * @kref: Reference count. ++ * @dev: Pointer to the clock's device. + * @rwsem: Protects the 'zombie' field from concurrent access. + * @zombie: If 'zombie' is true, then the hardware has disappeared. +- * @release: A function to free the structure when the reference count reaches +- * zero. May be NULL if structure is statically allocated. + * + * Drivers should embed their struct posix_clock within a private + * structure, obtaining a reference to it during callbacks using + * container_of(). ++ * ++ * Drivers should supply an initialized but not exposed struct device ++ * to posix_clock_register(). It is used to manage lifetime of the ++ * driver's private structure. It's 'release' field should be set to ++ * a release function for this private structure. + */ + struct posix_clock { + struct posix_clock_operations ops; + struct cdev cdev; +- struct kref kref; ++ struct device *dev; + struct rw_semaphore rwsem; + bool zombie; +- void (*release)(struct posix_clock *clk); + }; + + /** + * posix_clock_register() - register a new clock +- * @clk: Pointer to the clock. Caller must provide 'ops' and 'release' +- * @devid: Allocated device id ++ * @clk: Pointer to the clock. Caller must provide 'ops' field ++ * @dev: Pointer to the initialized device. Caller must provide ++ * 'release' field + * + * A clock driver calls this function to register itself with the + * clock device subsystem. If 'clk' points to dynamically allocated +@@ -135,7 +138,7 @@ struct posix_clock { + * + * Returns zero on success, non-zero otherwise. + */ +-int posix_clock_register(struct posix_clock *clk, dev_t devid); ++int posix_clock_register(struct posix_clock *clk, struct device *dev); + + /** + * posix_clock_unregister() - unregister a clock +--- a/kernel/time/posix-clock.c ++++ b/kernel/time/posix-clock.c +@@ -25,8 +25,6 @@ + #include + #include + +-static void delete_clock(struct kref *kref); +- + /* + * Returns NULL if the posix_clock instance attached to 'fp' is old and stale. + */ +@@ -168,7 +166,7 @@ static int posix_clock_open(struct inode + err = 0; + + if (!err) { +- kref_get(&clk->kref); ++ get_device(clk->dev); + fp->private_data = clk; + } + out: +@@ -184,7 +182,7 @@ static int posix_clock_release(struct in + if (clk->ops.release) + err = clk->ops.release(clk); + +- kref_put(&clk->kref, delete_clock); ++ put_device(clk->dev); + + fp->private_data = NULL; + +@@ -206,38 +204,35 @@ static const struct file_operations posi + #endif + }; + +-int posix_clock_register(struct posix_clock *clk, dev_t devid) ++int posix_clock_register(struct posix_clock *clk, struct device *dev) + { + int err; + +- kref_init(&clk->kref); + init_rwsem(&clk->rwsem); + + cdev_init(&clk->cdev, &posix_clock_file_operations); ++ err = cdev_device_add(&clk->cdev, dev); ++ if (err) { ++ pr_err("%s unable to add device %d:%d\n", ++ dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt)); ++ return err; ++ } + clk->cdev.owner = clk->ops.owner; +- err = cdev_add(&clk->cdev, devid, 1); ++ clk->dev = dev; + +- return err; ++ return 0; + } + EXPORT_SYMBOL_GPL(posix_clock_register); + +-static void delete_clock(struct kref *kref) +-{ +- struct posix_clock *clk = container_of(kref, struct posix_clock, kref); +- +- if (clk->release) +- clk->release(clk); +-} +- + void posix_clock_unregister(struct posix_clock *clk) + { +- cdev_del(&clk->cdev); ++ cdev_device_del(&clk->cdev, clk->dev); + + down_write(&clk->rwsem); + clk->zombie = true; + up_write(&clk->rwsem); + +- kref_put(&clk->kref, delete_clock); ++ put_device(clk->dev); + } + EXPORT_SYMBOL_GPL(posix_clock_unregister); + diff --git a/queue-4.9/ptp-use-is_visible-method-to-hide-unused-attributes.patch b/queue-4.9/ptp-use-is_visible-method-to-hide-unused-attributes.patch new file mode 100644 index 00000000000..ff219c01080 --- /dev/null +++ b/queue-4.9/ptp-use-is_visible-method-to-hide-unused-attributes.patch @@ -0,0 +1,210 @@ +From af59e717d5ff9c8dbf9bcc581c0dfb3b2a9c9030 Mon Sep 17 00:00:00 2001 +From: Dmitry Torokhov +Date: Tue, 14 Feb 2017 10:23:33 -0800 +Subject: ptp: use is_visible method to hide unused attributes + +From: Dmitry Torokhov + +commit af59e717d5ff9c8dbf9bcc581c0dfb3b2a9c9030 upstream. + +Instead of creating selected attributes after the device is created (and +after userspace potentially seen uevent), lets use attribute group +is_visible() method to control which attributes are shown. This will allow +us to create all attributes (except "pins" group, which will be taken care +of later) before userspace gets notified about new ptp class device. + +Signed-off-by: Dmitry Torokhov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ptp/ptp_sysfs.c | 125 +++++++++++++++++++++--------------------------- + 1 file changed, 55 insertions(+), 70 deletions(-) + +--- a/drivers/ptp/ptp_sysfs.c ++++ b/drivers/ptp/ptp_sysfs.c +@@ -46,27 +46,6 @@ PTP_SHOW_INT(n_periodic_outputs, n_per_o + PTP_SHOW_INT(n_programmable_pins, n_pins); + PTP_SHOW_INT(pps_available, pps); + +-static struct attribute *ptp_attrs[] = { +- &dev_attr_clock_name.attr, +- &dev_attr_max_adjustment.attr, +- &dev_attr_n_alarms.attr, +- &dev_attr_n_external_timestamps.attr, +- &dev_attr_n_periodic_outputs.attr, +- &dev_attr_n_programmable_pins.attr, +- &dev_attr_pps_available.attr, +- NULL, +-}; +- +-static const struct attribute_group ptp_group = { +- .attrs = ptp_attrs, +-}; +- +-const struct attribute_group *ptp_groups[] = { +- &ptp_group, +- NULL, +-}; +- +- + static ssize_t extts_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +@@ -91,6 +70,7 @@ static ssize_t extts_enable_store(struct + out: + return err; + } ++static DEVICE_ATTR(extts_enable, 0220, NULL, extts_enable_store); + + static ssize_t extts_fifo_show(struct device *dev, + struct device_attribute *attr, char *page) +@@ -124,6 +104,7 @@ out: + mutex_unlock(&ptp->tsevq_mux); + return cnt; + } ++static DEVICE_ATTR(fifo, 0444, extts_fifo_show, NULL); + + static ssize_t period_store(struct device *dev, + struct device_attribute *attr, +@@ -151,6 +132,7 @@ static ssize_t period_store(struct devic + out: + return err; + } ++static DEVICE_ATTR(period, 0220, NULL, period_store); + + static ssize_t pps_enable_store(struct device *dev, + struct device_attribute *attr, +@@ -177,6 +159,57 @@ static ssize_t pps_enable_store(struct d + out: + return err; + } ++static DEVICE_ATTR(pps_enable, 0220, NULL, pps_enable_store); ++ ++static struct attribute *ptp_attrs[] = { ++ &dev_attr_clock_name.attr, ++ ++ &dev_attr_max_adjustment.attr, ++ &dev_attr_n_alarms.attr, ++ &dev_attr_n_external_timestamps.attr, ++ &dev_attr_n_periodic_outputs.attr, ++ &dev_attr_n_programmable_pins.attr, ++ &dev_attr_pps_available.attr, ++ ++ &dev_attr_extts_enable.attr, ++ &dev_attr_fifo.attr, ++ &dev_attr_period.attr, ++ &dev_attr_pps_enable.attr, ++ NULL ++}; ++ ++static umode_t ptp_is_attribute_visible(struct kobject *kobj, ++ struct attribute *attr, int n) ++{ ++ struct device *dev = kobj_to_dev(kobj); ++ struct ptp_clock *ptp = dev_get_drvdata(dev); ++ struct ptp_clock_info *info = ptp->info; ++ umode_t mode = attr->mode; ++ ++ if (attr == &dev_attr_extts_enable.attr || ++ attr == &dev_attr_fifo.attr) { ++ if (!info->n_ext_ts) ++ mode = 0; ++ } else if (attr == &dev_attr_period.attr) { ++ if (!info->n_per_out) ++ mode = 0; ++ } else if (attr == &dev_attr_pps_enable.attr) { ++ if (!info->pps) ++ mode = 0; ++ } ++ ++ return mode; ++} ++ ++static const struct attribute_group ptp_group = { ++ .is_visible = ptp_is_attribute_visible, ++ .attrs = ptp_attrs, ++}; ++ ++const struct attribute_group *ptp_groups[] = { ++ &ptp_group, ++ NULL ++}; + + static int ptp_pin_name2index(struct ptp_clock *ptp, const char *name) + { +@@ -235,26 +268,11 @@ static ssize_t ptp_pin_store(struct devi + return count; + } + +-static DEVICE_ATTR(extts_enable, 0220, NULL, extts_enable_store); +-static DEVICE_ATTR(fifo, 0444, extts_fifo_show, NULL); +-static DEVICE_ATTR(period, 0220, NULL, period_store); +-static DEVICE_ATTR(pps_enable, 0220, NULL, pps_enable_store); +- + int ptp_cleanup_sysfs(struct ptp_clock *ptp) + { + struct device *dev = ptp->dev; + struct ptp_clock_info *info = ptp->info; + +- if (info->n_ext_ts) { +- device_remove_file(dev, &dev_attr_extts_enable); +- device_remove_file(dev, &dev_attr_fifo); +- } +- if (info->n_per_out) +- device_remove_file(dev, &dev_attr_period); +- +- if (info->pps) +- device_remove_file(dev, &dev_attr_pps_enable); +- + if (info->n_pins) { + sysfs_remove_group(&dev->kobj, &ptp->pin_attr_group); + kfree(ptp->pin_attr); +@@ -306,46 +324,13 @@ no_dev_attr: + + int ptp_populate_sysfs(struct ptp_clock *ptp) + { +- struct device *dev = ptp->dev; + struct ptp_clock_info *info = ptp->info; + int err; + +- if (info->n_ext_ts) { +- err = device_create_file(dev, &dev_attr_extts_enable); +- if (err) +- goto out1; +- err = device_create_file(dev, &dev_attr_fifo); +- if (err) +- goto out2; +- } +- if (info->n_per_out) { +- err = device_create_file(dev, &dev_attr_period); +- if (err) +- goto out3; +- } +- if (info->pps) { +- err = device_create_file(dev, &dev_attr_pps_enable); +- if (err) +- goto out4; +- } + if (info->n_pins) { + err = ptp_populate_pins(ptp); + if (err) +- goto out5; ++ return err; + } + return 0; +-out5: +- if (info->pps) +- device_remove_file(dev, &dev_attr_pps_enable); +-out4: +- if (info->n_per_out) +- device_remove_file(dev, &dev_attr_period); +-out3: +- if (info->n_ext_ts) +- device_remove_file(dev, &dev_attr_fifo); +-out2: +- if (info->n_ext_ts) +- device_remove_file(dev, &dev_attr_extts_enable); +-out1: +- return err; + } diff --git a/queue-4.9/ptp-use-kcalloc-when-allocating-arrays.patch b/queue-4.9/ptp-use-kcalloc-when-allocating-arrays.patch new file mode 100644 index 00000000000..1c5de4d1d76 --- /dev/null +++ b/queue-4.9/ptp-use-kcalloc-when-allocating-arrays.patch @@ -0,0 +1,38 @@ +From 6f7aa56bae6ff38727d5c8bf6ee7d4202b4e3865 Mon Sep 17 00:00:00 2001 +From: Dmitry Torokhov +Date: Tue, 14 Feb 2017 10:23:32 -0800 +Subject: ptp: use kcalloc when allocating arrays + +From: Dmitry Torokhov + +commit 6f7aa56bae6ff38727d5c8bf6ee7d4202b4e3865 upstream. + +kcalloc is more semantically correct when allocating arrays of objects, and +overflow-safe. + +Signed-off-by: Dmitry Torokhov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ptp/ptp_sysfs.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/ptp/ptp_sysfs.c ++++ b/drivers/ptp/ptp_sysfs.c +@@ -269,13 +269,12 @@ static int ptp_populate_pins(struct ptp_ + struct ptp_clock_info *info = ptp->info; + int err = -ENOMEM, i, n_pins = info->n_pins; + +- ptp->pin_dev_attr = kzalloc(n_pins * sizeof(*ptp->pin_dev_attr), ++ ptp->pin_dev_attr = kcalloc(n_pins, sizeof(*ptp->pin_dev_attr), + GFP_KERNEL); + if (!ptp->pin_dev_attr) + goto no_dev_attr; + +- ptp->pin_attr = kzalloc((1 + n_pins) * sizeof(struct attribute *), +- GFP_KERNEL); ++ ptp->pin_attr = kcalloc(1 + n_pins, sizeof(*ptp->pin_attr), GFP_KERNEL); + if (!ptp->pin_attr) + goto no_pin_attr; + diff --git a/queue-4.9/series b/queue-4.9/series index 53724b00f29..2f615077634 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -163,3 +163,15 @@ hrtimer-annotate-lockless-access-to-timer-state.patch tty-serial-atmel-fix-out-of-range-clock-divider-hand.patch pinctrl-baytrail-really-serialize-all-register-acces.patch mmc-sdhci-update-the-tuning-failed-messages-to-pr_de.patch +net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch +ptp-do-not-explicitly-set-drvdata-in-ptp_clock_register.patch +ptp-use-kcalloc-when-allocating-arrays.patch +ptp-use-is_visible-method-to-hide-unused-attributes.patch +ptp-create-pins-together-with-the-rest-of-attributes.patch +ptp-fix-pass-zero-to-err_ptr-in-ptp_clock_register.patch +ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch +vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch +tcp-dccp-fix-possible-race-__inet_lookup_established.patch +tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch +gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch +gtp-avoid-zero-size-hashtable.patch diff --git a/queue-4.9/tcp-dccp-fix-possible-race-__inet_lookup_established.patch b/queue-4.9/tcp-dccp-fix-possible-race-__inet_lookup_established.patch new file mode 100644 index 00000000000..e7e250d525f --- /dev/null +++ b/queue-4.9/tcp-dccp-fix-possible-race-__inet_lookup_established.patch @@ -0,0 +1,221 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Eric Dumazet +Date: Fri, 13 Dec 2019 18:20:41 -0800 +Subject: tcp/dccp: fix possible race __inet_lookup_established() + +From: Eric Dumazet + +[ Upstream commit 8dbd76e79a16b45b2ccb01d2f2e08dbf64e71e40 ] + +Michal Kubecek and Firo Yang did a very nice analysis of crashes +happening in __inet_lookup_established(). + +Since a TCP socket can go from TCP_ESTABLISH to TCP_LISTEN +(via a close()/socket()/listen() cycle) without a RCU grace period, +I should not have changed listeners linkage in their hash table. + +They must use the nulls protocol (Documentation/RCU/rculist_nulls.txt), +so that a lookup can detect a socket in a hash list was moved in +another one. + +Since we added code in commit d296ba60d8e2 ("soreuseport: Resolve +merge conflict for v4/v6 ordering fix"), we have to add +hlist_nulls_add_tail_rcu() helper. + +Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") +Signed-off-by: Eric Dumazet +Reported-by: Michal Kubecek +Reported-by: Firo Yang +Reviewed-by: Michal Kubecek +Link: https://lore.kernel.org/netdev/20191120083919.GH27852@unicorn.suse.cz/ +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/rculist_nulls.h | 37 +++++++++++++++++++++++++++++++++++++ + include/net/inet_hashtables.h | 12 +++++++++--- + include/net/sock.h | 5 +++++ + net/ipv4/inet_diag.c | 3 ++- + net/ipv4/inet_hashtables.c | 15 +++++++-------- + net/ipv4/tcp_ipv4.c | 7 ++++--- + 6 files changed, 64 insertions(+), 15 deletions(-) + +--- a/include/linux/rculist_nulls.h ++++ b/include/linux/rculist_nulls.h +@@ -100,6 +100,43 @@ static inline void hlist_nulls_add_head_ + } + + /** ++ * hlist_nulls_add_tail_rcu ++ * @n: the element to add to the hash list. ++ * @h: the list to add to. ++ * ++ * Description: ++ * Adds the specified element to the specified hlist_nulls, ++ * while permitting racing traversals. ++ * ++ * The caller must take whatever precautions are necessary ++ * (such as holding appropriate locks) to avoid racing ++ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() ++ * or hlist_nulls_del_rcu(), running on this same list. ++ * However, it is perfectly legal to run concurrently with ++ * the _rcu list-traversal primitives, such as ++ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency ++ * problems on Alpha CPUs. Regardless of the type of CPU, the ++ * list-traversal primitive must be guarded by rcu_read_lock(). ++ */ ++static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, ++ struct hlist_nulls_head *h) ++{ ++ struct hlist_nulls_node *i, *last = NULL; ++ ++ /* Note: write side code, so rcu accessors are not needed. */ ++ for (i = h->first; !is_a_nulls(i); i = i->next) ++ last = i; ++ ++ if (last) { ++ n->next = last->next; ++ n->pprev = &last->next; ++ rcu_assign_pointer(hlist_next_rcu(last), n); ++ } else { ++ hlist_nulls_add_head_rcu(n, h); ++ } ++} ++ ++/** + * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_nulls_node to use as a loop cursor. +--- a/include/net/inet_hashtables.h ++++ b/include/net/inet_hashtables.h +@@ -98,12 +98,18 @@ struct inet_bind_hashbucket { + struct hlist_head chain; + }; + +-/* +- * Sockets can be hashed in established or listening table ++/* Sockets can be hashed in established or listening table. ++ * We must use different 'nulls' end-of-chain value for all hash buckets : ++ * A socket might transition from ESTABLISH to LISTEN state without ++ * RCU grace period. A lookup in ehash table needs to handle this case. + */ ++#define LISTENING_NULLS_BASE (1U << 29) + struct inet_listen_hashbucket { + spinlock_t lock; +- struct hlist_head head; ++ union { ++ struct hlist_head head; ++ struct hlist_nulls_head nulls_head; ++ }; + }; + + /* This is for listening sockets, thus all sockets which possess wildcards. */ +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -661,6 +661,11 @@ static inline void __sk_nulls_add_node_r + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); + } + ++static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list) ++{ ++ hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); ++} ++ + static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) + { + sock_hold(sk); +--- a/net/ipv4/inet_diag.c ++++ b/net/ipv4/inet_diag.c +@@ -868,12 +868,13 @@ void inet_diag_dump_icsk(struct inet_has + + for (i = s_i; i < INET_LHTABLE_SIZE; i++) { + struct inet_listen_hashbucket *ilb; ++ struct hlist_nulls_node *node; + struct sock *sk; + + num = 0; + ilb = &hashinfo->listening_hash[i]; + spin_lock_bh(&ilb->lock); +- sk_for_each(sk, &ilb->head) { ++ sk_nulls_for_each(sk, node, &ilb->nulls_head) { + struct inet_sock *inet = inet_sk(sk); + + if (!net_eq(sock_net(sk), net)) +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -441,10 +441,11 @@ static int inet_reuseport_add_sock(struc + bool match_wildcard)) + { + struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; ++ const struct hlist_nulls_node *node; + struct sock *sk2; + kuid_t uid = sock_i_uid(sk); + +- sk_for_each_rcu(sk2, &ilb->head) { ++ sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) { + if (sk2 != sk && + sk2->sk_family == sk->sk_family && + ipv6_only_sock(sk2) == ipv6_only_sock(sk) && +@@ -482,9 +483,9 @@ int __inet_hash(struct sock *sk, struct + } + if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && + sk->sk_family == AF_INET6) +- hlist_add_tail_rcu(&sk->sk_node, &ilb->head); ++ __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head); + else +- hlist_add_head_rcu(&sk->sk_node, &ilb->head); ++ __sk_nulls_add_node_rcu(sk, &ilb->nulls_head); + sock_set_flag(sk, SOCK_RCU_FREE); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + unlock: +@@ -527,10 +528,7 @@ void inet_unhash(struct sock *sk) + spin_lock_bh(lock); + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); +- if (listener) +- done = __sk_del_node_init(sk); +- else +- done = __sk_nulls_del_node_init_rcu(sk); ++ done = __sk_nulls_del_node_init_rcu(sk); + if (done) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + spin_unlock_bh(lock); +@@ -666,7 +664,8 @@ void inet_hashinfo_init(struct inet_hash + + for (i = 0; i < INET_LHTABLE_SIZE; i++) { + spin_lock_init(&h->listening_hash[i].lock); +- INIT_HLIST_HEAD(&h->listening_hash[i].head); ++ INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head, ++ i + LISTENING_NULLS_BASE); + } + } + EXPORT_SYMBOL_GPL(inet_hashinfo_init); +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1917,13 +1917,14 @@ static void *listening_get_next(struct s + struct tcp_iter_state *st = seq->private; + struct net *net = seq_file_net(seq); + struct inet_listen_hashbucket *ilb; ++ struct hlist_nulls_node *node; + struct sock *sk = cur; + + if (!sk) { + get_head: + ilb = &tcp_hashinfo.listening_hash[st->bucket]; + spin_lock_bh(&ilb->lock); +- sk = sk_head(&ilb->head); ++ sk = sk_nulls_head(&ilb->nulls_head); + st->offset = 0; + goto get_sk; + } +@@ -1931,9 +1932,9 @@ get_head: + ++st->num; + ++st->offset; + +- sk = sk_next(sk); ++ sk = sk_nulls_next(sk); + get_sk: +- sk_for_each_from(sk) { ++ sk_nulls_for_each_from(sk, node) { + if (!net_eq(sock_net(sk), net)) + continue; + if (sk->sk_family == st->family) diff --git a/queue-4.9/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch b/queue-4.9/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch new file mode 100644 index 00000000000..42320f8cf75 --- /dev/null +++ b/queue-4.9/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch @@ -0,0 +1,53 @@ +From foo@baz Thu 02 Jan 2020 11:49:15 AM CET +From: Eric Dumazet +Date: Thu, 12 Dec 2019 12:55:29 -0800 +Subject: tcp: do not send empty skb from tcp_write_xmit() + +From: Eric Dumazet + +[ Upstream commit 1f85e6267caca44b30c54711652b0726fadbb131 ] + +Backport of commit fdfc5c8594c2 ("tcp: remove empty skb from +write queue in error cases") in linux-4.14 stable triggered +various bugs. One of them has been fixed in commit ba2ddb43f270 +("tcp: Don't dequeue SYN/FIN-segments from write-queue"), but +we still have crashes in some occasions. + +Root-cause is that when tcp_sendmsg() has allocated a fresh +skb and could not append a fragment before being blocked +in sk_stream_wait_memory(), tcp_write_xmit() might be called +and decide to send this fresh and empty skb. + +Sending an empty packet is not only silly, it might have caused +many issues we had in the past with tp->packets_out being +out of sync. + +Fixes: c65f7f00c587 ("[TCP]: Simplify SKB data portion allocation with NETIF_F_SG.") +Signed-off-by: Eric Dumazet +Cc: Christoph Paasch +Acked-by: Neal Cardwell +Cc: Jason Baron +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2233,6 +2233,14 @@ static bool tcp_write_xmit(struct sock * + if (tcp_small_queue_check(sk, skb, 0)) + break; + ++ /* Argh, we hit an empty skb(), presumably a thread ++ * is sleeping in sendmsg()/sk_stream_wait_memory(). ++ * We do not want to send a pure-ack packet and have ++ * a strange looking rtx queue with empty packet(s). ++ */ ++ if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) ++ break; ++ + if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) + break; + diff --git a/queue-4.9/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch b/queue-4.9/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch new file mode 100644 index 00000000000..7e4edc923d1 --- /dev/null +++ b/queue-4.9/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch @@ -0,0 +1,35 @@ +From foo@baz Thu 02 Jan 2020 11:49:15 AM CET +From: Stefano Garzarella +Date: Fri, 6 Dec 2019 15:39:12 +0100 +Subject: vhost/vsock: accept only packets with the right dst_cid + +From: Stefano Garzarella + +[ Upstream commit 8a3cc29c316c17de590e3ff8b59f3d6cbfd37b0a ] + +When we receive a new packet from the guest, we check if the +src_cid is correct, but we forgot to check the dst_cid. + +The host should accept only packets where dst_cid is +equal to the host CID. + +Signed-off-by: Stefano Garzarella +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/vsock.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/vhost/vsock.c ++++ b/drivers/vhost/vsock.c +@@ -399,7 +399,9 @@ static void vhost_vsock_handle_tx_kick(s + len = pkt->len; + + /* Only accept correctly addressed packets */ +- if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) ++ if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid && ++ le64_to_cpu(pkt->hdr.dst_cid) == ++ vhost_transport_get_local_cid()) + virtio_transport_recv_pkt(pkt); + else + virtio_transport_free_pkt(pkt);