From: Greg Kroah-Hartman Date: Thu, 2 Jan 2020 10:40:03 +0000 (+0100) Subject: 4.14-stable patches X-Git-Tag: v4.4.208~14 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d150d23daf496f5fdde863c21049c8319a8ee054;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: gtp-avoid-zero-size-hashtable.patch gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch sit-do-not-confirm-neighbor-when-do-pmtu-update.patch tcp-dccp-fix-possible-race-__inet_lookup_established.patch tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch vti-do-not-confirm-neighbor-when-do-pmtu-update.patch --- diff --git a/queue-4.14/gtp-avoid-zero-size-hashtable.patch b/queue-4.14/gtp-avoid-zero-size-hashtable.patch new file mode 100644 index 00000000000..087e1dc0f1d --- /dev/null +++ b/queue-4.14/gtp-avoid-zero-size-hashtable.patch @@ -0,0 +1,39 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Taehee Yoo +Date: Wed, 11 Dec 2019 08:23:48 +0000 +Subject: gtp: avoid zero size hashtable + +From: Taehee Yoo + +[ Upstream commit 6a902c0f31993ab02e1b6ea7085002b9c9083b6a ] + +GTP default hashtable size is 1024 and userspace could set specific +hashtable size with IFLA_GTP_PDP_HASHSIZE. If hashtable size is set to 0 +from userspace, hashtable will not work and panic will occur. + +Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") +Signed-off-by: Taehee Yoo +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/gtp.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -671,10 +671,13 @@ static int gtp_newlink(struct net *src_n + if (err < 0) + return err; + +- if (!data[IFLA_GTP_PDP_HASHSIZE]) ++ if (!data[IFLA_GTP_PDP_HASHSIZE]) { + hashsize = 1024; +- else ++ } else { + hashsize = nla_get_u32(data[IFLA_GTP_PDP_HASHSIZE]); ++ if (!hashsize) ++ hashsize = 1024; ++ } + + err = gtp_hashtable_new(gtp, hashsize); + if (err < 0) diff --git a/queue-4.14/gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch b/queue-4.14/gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch new file mode 100644 index 00000000000..079916e8fe6 --- /dev/null +++ b/queue-4.14/gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch @@ -0,0 +1,88 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Taehee Yoo +Date: Wed, 11 Dec 2019 08:23:00 +0000 +Subject: gtp: do not allow adding duplicate tid and ms_addr pdp context + +From: Taehee Yoo + +[ Upstream commit 6b01b1d9b2d38dc84ac398bfe9f00baff06a31e5 ] + +GTP RX packet path lookups pdp context with TID. If duplicate TID pdp +contexts are existing in the list, it couldn't select correct pdp context. +So, TID value should be unique. +GTP TX packet path lookups pdp context with ms_addr. If duplicate ms_addr pdp +contexts are existing in the list, it couldn't select correct pdp context. +So, ms_addr value should be unique. + +Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") +Signed-off-by: Taehee Yoo +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/gtp.c | 32 ++++++++++++++++++++++---------- + 1 file changed, 22 insertions(+), 10 deletions(-) + +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -929,24 +929,31 @@ static void ipv4_pdp_fill(struct pdp_ctx + } + } + +-static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk, +- struct genl_info *info) ++static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk, ++ struct genl_info *info) + { ++ struct pdp_ctx *pctx, *pctx_tid = NULL; + struct net_device *dev = gtp->dev; + u32 hash_ms, hash_tid = 0; +- struct pdp_ctx *pctx; ++ unsigned int version; + bool found = false; + __be32 ms_addr; + + ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); + hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size; ++ version = nla_get_u32(info->attrs[GTPA_VERSION]); + +- hlist_for_each_entry_rcu(pctx, >p->addr_hash[hash_ms], hlist_addr) { +- if (pctx->ms_addr_ip4.s_addr == ms_addr) { +- found = true; +- break; +- } +- } ++ pctx = ipv4_pdp_find(gtp, ms_addr); ++ if (pctx) ++ found = true; ++ if (version == GTP_V0) ++ pctx_tid = gtp0_pdp_find(gtp, ++ nla_get_u64(info->attrs[GTPA_TID])); ++ else if (version == GTP_V1) ++ pctx_tid = gtp1_pdp_find(gtp, ++ nla_get_u32(info->attrs[GTPA_I_TEI])); ++ if (pctx_tid) ++ found = true; + + if (found) { + if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) +@@ -954,6 +961,11 @@ static int ipv4_pdp_add(struct gtp_dev * + if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + ++ if (pctx && pctx_tid) ++ return -EEXIST; ++ if (!pctx) ++ pctx = pctx_tid; ++ + ipv4_pdp_fill(pctx, info); + + if (pctx->gtp_version == GTP_V0) +@@ -1077,7 +1089,7 @@ static int gtp_genl_new_pdp(struct sk_bu + goto out_unlock; + } + +- err = ipv4_pdp_add(gtp, sk, info); ++ err = gtp_pdp_add(gtp, sk, info); + + out_unlock: + rcu_read_unlock(); diff --git a/queue-4.14/gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-4.14/gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch new file mode 100644 index 00000000000..0acb8491277 --- /dev/null +++ b/queue-4.14/gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch @@ -0,0 +1,48 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Hangbin Liu +Date: Sun, 22 Dec 2019 10:51:11 +0800 +Subject: gtp: do not confirm neighbor when do pmtu update + +From: Hangbin Liu + +[ Upstream commit 6e9105c73f8d2163d12d5dfd762fd75483ed30f5 ] + +When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end, +we should not call dst_confirm_neigh() as there is no two-way communication. + +Although GTP only support ipv4 right now, and __ip_rt_update_pmtu() does not +call dst_confirm_neigh(), we still set it to false to keep consistency with +IPv6 code. + +v5: No change. +v4: No change. +v3: Do not remove dst_confirm_neigh, but add a new bool parameter in + dst_ops.update_pmtu to control whether we should do neighbor confirm. + Also split the big patch to small ones for each area. +v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. + +Reviewed-by: Guillaume Nault +Acked-by: David Ahern +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/gtp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c +index 9e3963f46458..d957ffdfec0f 100644 +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -545,7 +545,7 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, + mtu = dst_mtu(&rt->dst); + } + +- rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true); ++ rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false); + + if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && + mtu < ntohs(iph->tot_len)) { +-- +2.20.1 + diff --git a/queue-4.14/gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch b/queue-4.14/gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch new file mode 100644 index 00000000000..17ded4ea360 --- /dev/null +++ b/queue-4.14/gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch @@ -0,0 +1,161 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Taehee Yoo +Date: Wed, 11 Dec 2019 08:23:34 +0000 +Subject: gtp: fix an use-after-free in ipv4_pdp_find() + +From: Taehee Yoo + +[ Upstream commit 94dc550a5062030569d4aa76e10e50c8fc001930 ] + +ipv4_pdp_find() is called in TX packet path of GTP. +ipv4_pdp_find() internally uses gtp->tid_hash to lookup pdp context. +In the current code, gtp->tid_hash and gtp->addr_hash are freed by +->dellink(), which is gtp_dellink(). +But gtp_dellink() would be called while packets are processing. +So, gtp_dellink() should not free gtp->tid_hash and gtp->addr_hash. +Instead, dev->priv_destructor() would be used because this callback +is called after all packet processing safely. + +Test commands: + ip link add veth1 type veth peer name veth2 + ip a a 172.0.0.1/24 dev veth1 + ip link set veth1 up + ip a a 172.99.0.1/32 dev lo + + gtp-link add gtp1 & + + gtp-tunnel add gtp1 v1 200 100 172.99.0.2 172.0.0.2 + ip r a 172.99.0.2/32 dev gtp1 + ip link set gtp1 mtu 1500 + + ip netns add ns2 + ip link set veth2 netns ns2 + ip netns exec ns2 ip a a 172.0.0.2/24 dev veth2 + ip netns exec ns2 ip link set veth2 up + ip netns exec ns2 ip a a 172.99.0.2/32 dev lo + ip netns exec ns2 ip link set lo up + + ip netns exec ns2 gtp-link add gtp2 & + ip netns exec ns2 gtp-tunnel add gtp2 v1 100 200 172.99.0.1 172.0.0.1 + ip netns exec ns2 ip r a 172.99.0.1/32 dev gtp2 + ip netns exec ns2 ip link set gtp2 mtu 1500 + + hping3 172.99.0.2 -2 --flood & + ip link del gtp1 + +Splat looks like: +[ 72.568081][ T1195] BUG: KASAN: use-after-free in ipv4_pdp_find.isra.12+0x130/0x170 [gtp] +[ 72.568916][ T1195] Read of size 8 at addr ffff8880b9a35d28 by task hping3/1195 +[ 72.569631][ T1195] +[ 72.569861][ T1195] CPU: 2 PID: 1195 Comm: hping3 Not tainted 5.5.0-rc1 #199 +[ 72.570547][ T1195] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 +[ 72.571438][ T1195] Call Trace: +[ 72.571764][ T1195] dump_stack+0x96/0xdb +[ 72.572171][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp] +[ 72.572761][ T1195] print_address_description.constprop.5+0x1be/0x360 +[ 72.573400][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp] +[ 72.573971][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp] +[ 72.574544][ T1195] __kasan_report+0x12a/0x16f +[ 72.575014][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp] +[ 72.575593][ T1195] kasan_report+0xe/0x20 +[ 72.576004][ T1195] ipv4_pdp_find.isra.12+0x130/0x170 [gtp] +[ 72.576577][ T1195] gtp_build_skb_ip4+0x199/0x1420 [gtp] +[ ... ] +[ 72.647671][ T1195] BUG: unable to handle page fault for address: ffff8880b9a35d28 +[ 72.648512][ T1195] #PF: supervisor read access in kernel mode +[ 72.649158][ T1195] #PF: error_code(0x0000) - not-present page +[ 72.649849][ T1195] PGD a6c01067 P4D a6c01067 PUD 11fb07067 PMD 11f939067 PTE 800fffff465ca060 +[ 72.652958][ T1195] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI +[ 72.653834][ T1195] CPU: 2 PID: 1195 Comm: hping3 Tainted: G B 5.5.0-rc1 #199 +[ 72.668062][ T1195] RIP: 0010:ipv4_pdp_find.isra.12+0x86/0x170 [gtp] +[ ... ] +[ 72.679168][ T1195] Call Trace: +[ 72.679603][ T1195] gtp_build_skb_ip4+0x199/0x1420 [gtp] +[ 72.681915][ T1195] ? ipv4_pdp_find.isra.12+0x170/0x170 [gtp] +[ 72.682513][ T1195] ? lock_acquire+0x164/0x3b0 +[ 72.682966][ T1195] ? gtp_dev_xmit+0x35e/0x890 [gtp] +[ 72.683481][ T1195] gtp_dev_xmit+0x3c2/0x890 [gtp] +[ ... ] + +Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") +Signed-off-by: Taehee Yoo +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/gtp.c | 34 +++++++++++++++++----------------- + 1 file changed, 17 insertions(+), 17 deletions(-) + +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -644,9 +644,16 @@ static void gtp_link_setup(struct net_de + } + + static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize); +-static void gtp_hashtable_free(struct gtp_dev *gtp); + static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]); + ++static void gtp_destructor(struct net_device *dev) ++{ ++ struct gtp_dev *gtp = netdev_priv(dev); ++ ++ kfree(gtp->addr_hash); ++ kfree(gtp->tid_hash); ++} ++ + static int gtp_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +@@ -681,13 +688,15 @@ static int gtp_newlink(struct net *src_n + + gn = net_generic(dev_net(dev), gtp_net_id); + list_add_rcu(>p->list, &gn->gtp_dev_list); ++ dev->priv_destructor = gtp_destructor; + + netdev_dbg(dev, "registered new GTP interface\n"); + + return 0; + + out_hashtable: +- gtp_hashtable_free(gtp); ++ kfree(gtp->addr_hash); ++ kfree(gtp->tid_hash); + out_encap: + gtp_encap_disable(gtp); + return err; +@@ -696,9 +705,14 @@ out_encap: + static void gtp_dellink(struct net_device *dev, struct list_head *head) + { + struct gtp_dev *gtp = netdev_priv(dev); ++ struct pdp_ctx *pctx; ++ int i; ++ ++ for (i = 0; i < gtp->hash_size; i++) ++ hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) ++ pdp_context_delete(pctx); + + gtp_encap_disable(gtp); +- gtp_hashtable_free(gtp); + list_del_rcu(>p->list); + unregister_netdevice_queue(dev, head); + } +@@ -774,20 +788,6 @@ err1: + return -ENOMEM; + } + +-static void gtp_hashtable_free(struct gtp_dev *gtp) +-{ +- struct pdp_ctx *pctx; +- int i; +- +- for (i = 0; i < gtp->hash_size; i++) +- hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) +- pdp_context_delete(pctx); +- +- synchronize_rcu(); +- kfree(gtp->addr_hash); +- kfree(gtp->tid_hash); +-} +- + static struct sock *gtp_encap_enable_socket(int fd, int type, + struct gtp_dev *gtp) + { diff --git a/queue-4.14/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch b/queue-4.14/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch new file mode 100644 index 00000000000..18febbbf60a --- /dev/null +++ b/queue-4.14/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch @@ -0,0 +1,102 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Taehee Yoo +Date: Wed, 11 Dec 2019 08:23:17 +0000 +Subject: gtp: fix wrong condition in gtp_genl_dump_pdp() + +From: Taehee Yoo + +[ Upstream commit 94a6d9fb88df43f92d943c32b84ce398d50bf49f ] + +gtp_genl_dump_pdp() is ->dumpit() callback of GTP module and it is used +to dump pdp contexts. it would be re-executed because of dump packet size. + +If dump packet size is too big, it saves current dump pointer +(gtp interface pointer, bucket, TID value) then it restarts dump from +last pointer. +Current GTP code allows adding zero TID pdp context but dump code +ignores zero TID value. So, last dump pointer will not be found. + +In addition, this patch adds missing rcu_read_lock() in +gtp_genl_dump_pdp(). + +Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") +Signed-off-by: Taehee Yoo +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/gtp.c | 36 +++++++++++++++++++----------------- + 1 file changed, 19 insertions(+), 17 deletions(-) + +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -42,7 +42,6 @@ struct pdp_ctx { + struct hlist_node hlist_addr; + + union { +- u64 tid; + struct { + u64 tid; + u16 flow; +@@ -1247,43 +1246,46 @@ static int gtp_genl_dump_pdp(struct sk_b + struct netlink_callback *cb) + { + struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp; ++ int i, j, bucket = cb->args[0], skip = cb->args[1]; + struct net *net = sock_net(skb->sk); +- struct gtp_net *gn = net_generic(net, gtp_net_id); +- unsigned long tid = cb->args[1]; +- int i, k = cb->args[0], ret; + struct pdp_ctx *pctx; ++ struct gtp_net *gn; ++ ++ gn = net_generic(net, gtp_net_id); + + if (cb->args[4]) + return 0; + ++ rcu_read_lock(); + list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) { + if (last_gtp && last_gtp != gtp) + continue; + else + last_gtp = NULL; + +- for (i = k; i < gtp->hash_size; i++) { +- hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) { +- if (tid && tid != pctx->u.tid) +- continue; +- else +- tid = 0; +- +- ret = gtp_genl_fill_info(skb, +- NETLINK_CB(cb->skb).portid, +- cb->nlh->nlmsg_seq, +- cb->nlh->nlmsg_type, pctx); +- if (ret < 0) { ++ for (i = bucket; i < gtp->hash_size; i++) { ++ j = 0; ++ hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], ++ hlist_tid) { ++ if (j >= skip && ++ gtp_genl_fill_info(skb, ++ NETLINK_CB(cb->skb).portid, ++ cb->nlh->nlmsg_seq, ++ cb->nlh->nlmsg_type, pctx)) { + cb->args[0] = i; +- cb->args[1] = pctx->u.tid; ++ cb->args[1] = j; + cb->args[2] = (unsigned long)gtp; + goto out; + } ++ j++; + } ++ skip = 0; + } ++ bucket = 0; + } + cb->args[4] = 1; + out: ++ rcu_read_unlock(); + return skb->len; + } + diff --git a/queue-4.14/ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-4.14/ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch new file mode 100644 index 00000000000..ac7c65038d6 --- /dev/null +++ b/queue-4.14/ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch @@ -0,0 +1,47 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Hangbin Liu +Date: Sun, 22 Dec 2019 10:51:10 +0800 +Subject: ip6_gre: do not confirm neighbor when do pmtu update + +From: Hangbin Liu + +[ Upstream commit 675d76ad0ad5bf41c9a129772ef0aba8f57ea9a7 ] + +When we do ipv6 gre pmtu update, we will also do neigh confirm currently. +This will cause the neigh cache be refreshed and set to REACHABLE before +xmit. + +But if the remote mac address changed, e.g. device is deleted and recreated, +we will not able to notice this and still use the old mac address as the neigh +cache is REACHABLE. + +Fix this by disable neigh confirm when do pmtu update + +v5: No change. +v4: No change. +v3: Do not remove dst_confirm_neigh, but add a new bool parameter in + dst_ops.update_pmtu to control whether we should do neighbor confirm. + Also split the big patch to small ones for each area. +v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. + +Reported-by: Jianlin Shi +Reviewed-by: Guillaume Nault +Acked-by: David Ahern +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -527,7 +527,7 @@ static netdev_tx_t __gre6_xmit(struct sk + + /* TooBig packet may have updated dst->dev's mtu */ + if (dst && dst_mtu(dst) > dst->dev->mtu) +- dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true); ++ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false); + + return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, + NEXTHDR_GRE); diff --git a/queue-4.14/net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch b/queue-4.14/net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch new file mode 100644 index 00000000000..cf56fc1c9b0 --- /dev/null +++ b/queue-4.14/net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch @@ -0,0 +1,325 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Hangbin Liu +Date: Sun, 22 Dec 2019 10:51:09 +0800 +Subject: net: add bool confirm_neigh parameter for dst_ops.update_pmtu + +From: Hangbin Liu + +[ Upstream commit bd085ef678b2cc8c38c105673dfe8ff8f5ec0c57 ] + +The MTU update code is supposed to be invoked in response to real +networking events that update the PMTU. In IPv6 PMTU update function +__ip6_rt_update_pmtu() we called dst_confirm_neigh() to update neighbor +confirmed time. + +But for tunnel code, it will call pmtu before xmit, like: + - tnl_update_pmtu() + - skb_dst_update_pmtu() + - ip6_rt_update_pmtu() + - __ip6_rt_update_pmtu() + - dst_confirm_neigh() + +If the tunnel remote dst mac address changed and we still do the neigh +confirm, we will not be able to update neigh cache and ping6 remote +will failed. + +So for this ip_tunnel_xmit() case, _EVEN_ if the MTU is changed, we +should not be invoking dst_confirm_neigh() as we have no evidence +of successful two-way communication at this point. + +On the other hand it is also important to keep the neigh reachability fresh +for TCP flows, so we cannot remove this dst_confirm_neigh() call. + +To fix the issue, we have to add a new bool parameter for dst_ops.update_pmtu +to choose whether we should do neigh update or not. I will add the parameter +in this patch and set all the callers to true to comply with the previous +way, and fix the tunnel code one by one on later patches. + +v5: No change. +v4: No change. +v3: Do not remove dst_confirm_neigh, but add a new bool parameter in + dst_ops.update_pmtu to control whether we should do neighbor confirm. + Also split the big patch to small ones for each area. +v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. + +Suggested-by: David Miller +Reviewed-by: Guillaume Nault +Acked-by: David Ahern +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/gtp.c | 2 +- + include/net/dst.h | 2 +- + include/net/dst_ops.h | 3 ++- + net/bridge/br_nf_core.c | 3 ++- + net/decnet/dn_route.c | 6 ++++-- + net/ipv4/inet_connection_sock.c | 2 +- + net/ipv4/route.c | 9 ++++++--- + net/ipv4/xfrm4_policy.c | 5 +++-- + net/ipv6/inet6_connection_sock.c | 2 +- + net/ipv6/ip6_gre.c | 2 +- + net/ipv6/route.c | 22 +++++++++++++++------- + net/ipv6/xfrm6_policy.c | 5 +++-- + net/netfilter/ipvs/ip_vs_xmit.c | 2 +- + net/sctp/transport.c | 2 +- + 14 files changed, 42 insertions(+), 25 deletions(-) + +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -545,7 +545,7 @@ static int gtp_build_skb_ip4(struct sk_b + mtu = dst_mtu(&rt->dst); + } + +- rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu); ++ rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true); + + if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && + mtu < ntohs(iph->tot_len)) { +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -528,7 +528,7 @@ static inline void skb_dst_update_pmtu(s + struct dst_entry *dst = skb_dst(skb); + + if (dst && dst->ops->update_pmtu) +- dst->ops->update_pmtu(dst, NULL, skb, mtu); ++ dst->ops->update_pmtu(dst, NULL, skb, mtu, true); + } + + #endif /* _NET_DST_H */ +--- a/include/net/dst_ops.h ++++ b/include/net/dst_ops.h +@@ -27,7 +27,8 @@ struct dst_ops { + struct dst_entry * (*negative_advice)(struct dst_entry *); + void (*link_failure)(struct sk_buff *); + void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, +- struct sk_buff *skb, u32 mtu); ++ struct sk_buff *skb, u32 mtu, ++ bool confirm_neigh); + void (*redirect)(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); + int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb); +--- a/net/bridge/br_nf_core.c ++++ b/net/bridge/br_nf_core.c +@@ -26,7 +26,8 @@ + #endif + + static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, +- struct sk_buff *skb, u32 mtu) ++ struct sk_buff *skb, u32 mtu, ++ bool confirm_neigh) + { + } + +--- a/net/decnet/dn_route.c ++++ b/net/decnet/dn_route.c +@@ -118,7 +118,8 @@ static void dn_dst_ifdown(struct dst_ent + static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); + static void dn_dst_link_failure(struct sk_buff *); + static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, +- struct sk_buff *skb , u32 mtu); ++ struct sk_buff *skb , u32 mtu, ++ bool confirm_neigh); + static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); + static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, +@@ -259,7 +260,8 @@ static int dn_dst_gc(struct dst_ops *ops + * advertise to the other end). + */ + static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, +- struct sk_buff *skb, u32 mtu) ++ struct sk_buff *skb, u32 mtu, ++ bool confirm_neigh) + { + struct dn_route *rt = (struct dn_route *) dst; + struct neighbour *n = rt->n; +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -1088,7 +1088,7 @@ struct dst_entry *inet_csk_update_pmtu(s + if (!dst) + goto out; + } +- dst->ops->update_pmtu(dst, sk, NULL, mtu); ++ dst->ops->update_pmtu(dst, sk, NULL, mtu, true); + + dst = __sk_dst_check(sk, 0); + if (!dst) +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -145,7 +145,8 @@ static unsigned int ipv4_mtu(const stru + static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); + static void ipv4_link_failure(struct sk_buff *skb); + static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, +- struct sk_buff *skb, u32 mtu); ++ struct sk_buff *skb, u32 mtu, ++ bool confirm_neigh); + static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); + static void ipv4_dst_destroy(struct dst_entry *dst); +@@ -1042,7 +1043,8 @@ static void __ip_rt_update_pmtu(struct r + } + + static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, +- struct sk_buff *skb, u32 mtu) ++ struct sk_buff *skb, u32 mtu, ++ bool confirm_neigh) + { + struct rtable *rt = (struct rtable *) dst; + struct flowi4 fl4; +@@ -2529,7 +2531,8 @@ static unsigned int ipv4_blackhole_mtu(c + } + + static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, +- struct sk_buff *skb, u32 mtu) ++ struct sk_buff *skb, u32 mtu, ++ bool confirm_neigh) + { + } + +--- a/net/ipv4/xfrm4_policy.c ++++ b/net/ipv4/xfrm4_policy.c +@@ -222,12 +222,13 @@ _decode_session4(struct sk_buff *skb, st + } + + static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, +- struct sk_buff *skb, u32 mtu) ++ struct sk_buff *skb, u32 mtu, ++ bool confirm_neigh) + { + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + struct dst_entry *path = xdst->route; + +- path->ops->update_pmtu(path, sk, skb, mtu); ++ path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); + } + + static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, +--- a/net/ipv6/inet6_connection_sock.c ++++ b/net/ipv6/inet6_connection_sock.c +@@ -150,7 +150,7 @@ struct dst_entry *inet6_csk_update_pmtu( + + if (IS_ERR(dst)) + return NULL; +- dst->ops->update_pmtu(dst, sk, NULL, mtu); ++ dst->ops->update_pmtu(dst, sk, NULL, mtu, true); + + dst = inet6_csk_route_socket(sk, &fl6); + return IS_ERR(dst) ? NULL : dst; +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -527,7 +527,7 @@ static netdev_tx_t __gre6_xmit(struct sk + + /* TooBig packet may have updated dst->dev's mtu */ + if (dst && dst_mtu(dst) > dst->dev->mtu) +- dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); ++ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true); + + return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, + NEXTHDR_GRE); +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -93,7 +93,8 @@ static int ip6_pkt_prohibit(struct sk_b + static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); + static void ip6_link_failure(struct sk_buff *skb); + static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, +- struct sk_buff *skb, u32 mtu); ++ struct sk_buff *skb, u32 mtu, ++ bool confirm_neigh); + static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); + static void rt6_dst_from_metrics_check(struct rt6_info *rt); +@@ -264,7 +265,8 @@ static unsigned int ip6_blackhole_mtu(co + } + + static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, +- struct sk_buff *skb, u32 mtu) ++ struct sk_buff *skb, u32 mtu, ++ bool confirm_neigh) + { + } + +@@ -1471,7 +1473,8 @@ static bool rt6_cache_allowed_for_pmtu(c + } + + static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, +- const struct ipv6hdr *iph, u32 mtu) ++ const struct ipv6hdr *iph, u32 mtu, ++ bool confirm_neigh) + { + const struct in6_addr *daddr, *saddr; + struct rt6_info *rt6 = (struct rt6_info *)dst; +@@ -1489,7 +1492,10 @@ static void __ip6_rt_update_pmtu(struct + daddr = NULL; + saddr = NULL; + } +- dst_confirm_neigh(dst, daddr); ++ ++ if (confirm_neigh) ++ dst_confirm_neigh(dst, daddr); ++ + mtu = max_t(u32, mtu, IPV6_MIN_MTU); + if (mtu >= dst_mtu(dst)) + return; +@@ -1518,9 +1524,11 @@ static void __ip6_rt_update_pmtu(struct + } + + static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, +- struct sk_buff *skb, u32 mtu) ++ struct sk_buff *skb, u32 mtu, ++ bool confirm_neigh) + { +- __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); ++ __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu, ++ confirm_neigh); + } + + void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, +@@ -1540,7 +1548,7 @@ void ip6_update_pmtu(struct sk_buff *skb + + dst = ip6_route_output(net, NULL, &fl6); + if (!dst->error) +- __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); ++ __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true); + dst_release(dst); + } + EXPORT_SYMBOL_GPL(ip6_update_pmtu); +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -219,12 +219,13 @@ _decode_session6(struct sk_buff *skb, st + } + + static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, +- struct sk_buff *skb, u32 mtu) ++ struct sk_buff *skb, u32 mtu, ++ bool confirm_neigh) + { + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + struct dst_entry *path = xdst->route; + +- path->ops->update_pmtu(path, sk, skb, mtu); ++ path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); + } + + static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, +--- a/net/netfilter/ipvs/ip_vs_xmit.c ++++ b/net/netfilter/ipvs/ip_vs_xmit.c +@@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int + struct rtable *ort = skb_rtable(skb); + + if (!skb->dev && sk && sk_fullsock(sk)) +- ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); ++ ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true); + } + + static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, +--- a/net/sctp/transport.c ++++ b/net/sctp/transport.c +@@ -272,7 +272,7 @@ bool sctp_transport_update_pmtu(struct s + + pf->af->from_sk(&addr, sk); + pf->to_sk_daddr(&t->ipaddr, sk); +- dst->ops->update_pmtu(dst, sk, NULL, pmtu); ++ dst->ops->update_pmtu(dst, sk, NULL, pmtu, true); + pf->to_sk_daddr(&addr, sk); + + dst = sctp_transport_dst_check(t); diff --git a/queue-4.14/net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch b/queue-4.14/net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch new file mode 100644 index 00000000000..c2503ba0c00 --- /dev/null +++ b/queue-4.14/net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch @@ -0,0 +1,44 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Hangbin Liu +Date: Sun, 22 Dec 2019 10:51:12 +0800 +Subject: net/dst: add new function skb_dst_update_pmtu_no_confirm + +From: Hangbin Liu + +[ Upstream commit 07dc35c6e3cc3c001915d05f5bf21f80a39a0970 ] + +Add a new function skb_dst_update_pmtu_no_confirm() for callers who need +update pmtu but should not do neighbor confirm. + +v5: No change. +v4: No change. +v3: Do not remove dst_confirm_neigh, but add a new bool parameter in + dst_ops.update_pmtu to control whether we should do neighbor confirm. + Also split the big patch to small ones for each area. +v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. + +Reviewed-by: Guillaume Nault +Acked-by: David Ahern +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/dst.h | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -531,4 +531,13 @@ static inline void skb_dst_update_pmtu(s + dst->ops->update_pmtu(dst, NULL, skb, mtu, true); + } + ++/* update dst pmtu but not do neighbor confirm */ ++static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu) ++{ ++ struct dst_entry *dst = skb_dst(skb); ++ ++ if (dst && dst->ops->update_pmtu) ++ dst->ops->update_pmtu(dst, NULL, skb, mtu, false); ++} ++ + #endif /* _NET_DST_H */ diff --git a/queue-4.14/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch b/queue-4.14/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch new file mode 100644 index 00000000000..ffe363f2b21 --- /dev/null +++ b/queue-4.14/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch @@ -0,0 +1,54 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Netanel Belgazal +Date: Tue, 10 Dec 2019 11:27:44 +0000 +Subject: net: ena: fix napi handler misbehavior when the napi budget is zero + +From: Netanel Belgazal + +[ Upstream commit 24dee0c7478d1a1e00abdf5625b7f921467325dc ] + +In netpoll the napi handler could be called with budget equal to zero. +Current ENA napi handler doesn't take that into consideration. + +The napi handler handles Rx packets in a do-while loop. +Currently, the budget check happens only after decrementing the +budget, therefore the napi handler, in rare cases, could run over +MAX_INT packets. + +In addition to that, this moves all budget related variables to int +calculation and stop mixing u32 to avoid ambiguity + +Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") +Signed-off-by: Netanel Belgazal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -1196,8 +1196,8 @@ static int ena_io_poll(struct napi_struc + struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); + struct ena_ring *tx_ring, *rx_ring; + +- u32 tx_work_done; +- u32 rx_work_done; ++ int tx_work_done; ++ int rx_work_done = 0; + int tx_budget; + int napi_comp_call = 0; + int ret; +@@ -1214,7 +1214,11 @@ static int ena_io_poll(struct napi_struc + } + + tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget); +- rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); ++ /* On netpoll the budget is zero and the handler should only clean the ++ * tx completions. ++ */ ++ if (likely(budget)) ++ rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); + + /* If the device is about to reset or down, avoid unmask + * the interrupt and return 0 so NAPI won't reschedule diff --git a/queue-4.14/net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch b/queue-4.14/net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch new file mode 100644 index 00000000000..5dc3a268cc5 --- /dev/null +++ b/queue-4.14/net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch @@ -0,0 +1,62 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Vladyslav Tarasiuk +Date: Thu, 26 Dec 2019 10:41:56 +0200 +Subject: net/mlxfw: Fix out-of-memory error in mfa2 flash burning + +From: Vladyslav Tarasiuk + +[ Upstream commit a5bcd72e054aabb93ddc51ed8cde36a5bfc50271 ] + +The burning process requires to perform internal allocations of large +chunks of memory. This memory doesn't need to be contiguous and can be +safely allocated by vzalloc() instead of kzalloc(). This patch changes +such allocation to avoid possible out-of-memory failure. + +Fixes: 410ed13cae39 ("Add the mlxfw module for Mellanox firmware flash process") +Signed-off-by: Vladyslav Tarasiuk +Reviewed-by: Aya Levin +Signed-off-by: Leon Romanovsky +Tested-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c ++++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + #include + #include "mlxfw_mfa2.h" + #include "mlxfw_mfa2_file.h" +@@ -579,7 +580,7 @@ mlxfw_mfa2_file_component_get(const stru + comp_size = be32_to_cpu(comp->size); + comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len; + +- comp_data = kmalloc(sizeof(*comp_data) + comp_buf_size, GFP_KERNEL); ++ comp_data = vzalloc(sizeof(*comp_data) + comp_buf_size); + if (!comp_data) + return ERR_PTR(-ENOMEM); + comp_data->comp.data_size = comp_size; +@@ -601,7 +602,7 @@ mlxfw_mfa2_file_component_get(const stru + comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len; + return &comp_data->comp; + err_out: +- kfree(comp_data); ++ vfree(comp_data); + return ERR_PTR(err); + } + +@@ -610,7 +611,7 @@ void mlxfw_mfa2_file_component_put(struc + const struct mlxfw_mfa2_comp_data *comp_data; + + comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp); +- kfree(comp_data); ++ vfree(comp_data); + } + + void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file) diff --git a/queue-4.14/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch b/queue-4.14/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch new file mode 100644 index 00000000000..4e667290a6a --- /dev/null +++ b/queue-4.14/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch @@ -0,0 +1,317 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Vladis Dronov +Date: Fri, 27 Dec 2019 03:26:27 +0100 +Subject: ptp: fix the race between the release of ptp_clock and cdev + +From: Vladis Dronov + +[ Upstream commit a33121e5487b424339636b25c35d3a180eaa5f5e ] + +In a case when a ptp chardev (like /dev/ptp0) is open but an underlying +device is removed, closing this file leads to a race. This reproduces +easily in a kvm virtual machine: + +ts# cat openptp0.c +int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } +ts# uname -r +5.5.0-rc3-46cf053e +ts# cat /proc/cmdline +... slub_debug=FZP +ts# modprobe ptp_kvm +ts# ./openptp0 & +[1] 670 +opened /dev/ptp0, sleeping 10s... +ts# rmmod ptp_kvm +ts# ls /dev/ptp* +ls: cannot access '/dev/ptp*': No such file or directory +ts# ...woken up +[ 48.010809] general protection fault: 0000 [#1] SMP +[ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 +[ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... +[ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 +[ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 +[ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 +[ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b +[ 48.019470] ... ^^^ a slub poison +[ 48.023854] Call Trace: +[ 48.024050] __fput+0x21f/0x240 +[ 48.024288] task_work_run+0x79/0x90 +[ 48.024555] do_exit+0x2af/0xab0 +[ 48.024799] ? vfs_write+0x16a/0x190 +[ 48.025082] do_group_exit+0x35/0x90 +[ 48.025387] __x64_sys_exit_group+0xf/0x10 +[ 48.025737] do_syscall_64+0x3d/0x130 +[ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 48.026479] RIP: 0033:0x7f53b12082f6 +[ 48.026792] ... +[ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] +[ 48.045001] Fixing recursive fault but reboot is needed! + +This happens in: + +static void __fput(struct file *file) +{ ... + if (file->f_op->release) + file->f_op->release(inode, file); <<< cdev is kfree'd here + if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && + !(mode & FMODE_PATH))) { + cdev_put(inode->i_cdev); <<< cdev fields are accessed here + +Namely: + +__fput() + posix_clock_release() + kref_put(&clk->kref, delete_clock) <<< the last reference + delete_clock() + delete_ptp_clock() + kfree(ptp) <<< cdev is embedded in ptp + cdev_put + module_put(p->owner) <<< *p is kfree'd, bang! + +Here cdev is embedded in posix_clock which is embedded in ptp_clock. +The race happens because ptp_clock's lifetime is controlled by two +refcounts: kref and cdev.kobj in posix_clock. This is wrong. + +Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() +created especially for such cases. This way the parent device with its +ptp_clock is not released until all references to the cdev are released. +This adds a requirement that an initialized but not exposed struct +device should be provided to posix_clock_register() by a caller instead +of a simple dev_t. + +This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix +the race between the release of watchdog_core_data and cdev"). See +details of the implementation in the commit 233ed09d7fda ("chardev: add +helper function to register char devs with a struct device"). + +Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u +Analyzed-by: Stephen Johnston +Analyzed-by: Vern Lovejoy +Signed-off-by: Vladis Dronov +Acked-by: Richard Cochran +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ptp/ptp_clock.c | 31 ++++++++++++++----------------- + drivers/ptp/ptp_private.h | 2 +- + include/linux/posix-clock.h | 19 +++++++++++-------- + kernel/time/posix-clock.c | 31 +++++++++++++------------------ + 4 files changed, 39 insertions(+), 44 deletions(-) + +--- a/drivers/ptp/ptp_clock.c ++++ b/drivers/ptp/ptp_clock.c +@@ -175,9 +175,9 @@ static struct posix_clock_operations ptp + .read = ptp_read, + }; + +-static void delete_ptp_clock(struct posix_clock *pc) ++static void ptp_clock_release(struct device *dev) + { +- struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); ++ struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev); + + mutex_destroy(&ptp->tsevq_mux); + mutex_destroy(&ptp->pincfg_mux); +@@ -222,7 +222,6 @@ struct ptp_clock *ptp_clock_register(str + } + + ptp->clock.ops = ptp_clock_ops; +- ptp->clock.release = delete_ptp_clock; + ptp->info = info; + ptp->devid = MKDEV(major, index); + ptp->index = index; +@@ -249,15 +248,6 @@ struct ptp_clock *ptp_clock_register(str + if (err) + goto no_pin_groups; + +- /* Create a new device in our class. */ +- ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid, +- ptp, ptp->pin_attr_groups, +- "ptp%d", ptp->index); +- if (IS_ERR(ptp->dev)) { +- err = PTR_ERR(ptp->dev); +- goto no_device; +- } +- + /* Register a new PPS source. */ + if (info->pps) { + struct pps_source_info pps; +@@ -273,8 +263,18 @@ struct ptp_clock *ptp_clock_register(str + } + } + +- /* Create a posix clock. */ +- err = posix_clock_register(&ptp->clock, ptp->devid); ++ /* Initialize a new device of our class in our clock structure. */ ++ device_initialize(&ptp->dev); ++ ptp->dev.devt = ptp->devid; ++ ptp->dev.class = ptp_class; ++ ptp->dev.parent = parent; ++ ptp->dev.groups = ptp->pin_attr_groups; ++ ptp->dev.release = ptp_clock_release; ++ dev_set_drvdata(&ptp->dev, ptp); ++ dev_set_name(&ptp->dev, "ptp%d", ptp->index); ++ ++ /* Create a posix clock and link it to the device. */ ++ err = posix_clock_register(&ptp->clock, &ptp->dev); + if (err) { + pr_err("failed to create posix clock\n"); + goto no_clock; +@@ -286,8 +286,6 @@ no_clock: + if (ptp->pps_source) + pps_unregister_source(ptp->pps_source); + no_pps: +- device_destroy(ptp_class, ptp->devid); +-no_device: + ptp_cleanup_pin_groups(ptp); + no_pin_groups: + if (ptp->kworker) +@@ -317,7 +315,6 @@ int ptp_clock_unregister(struct ptp_cloc + if (ptp->pps_source) + pps_unregister_source(ptp->pps_source); + +- device_destroy(ptp_class, ptp->devid); + ptp_cleanup_pin_groups(ptp); + + posix_clock_unregister(&ptp->clock); +--- a/drivers/ptp/ptp_private.h ++++ b/drivers/ptp/ptp_private.h +@@ -41,7 +41,7 @@ struct timestamp_event_queue { + + struct ptp_clock { + struct posix_clock clock; +- struct device *dev; ++ struct device dev; + struct ptp_clock_info *info; + dev_t devid; + int index; /* index into clocks.map */ +--- a/include/linux/posix-clock.h ++++ b/include/linux/posix-clock.h +@@ -82,29 +82,32 @@ struct posix_clock_operations { + * + * @ops: Functional interface to the clock + * @cdev: Character device instance for this clock +- * @kref: Reference count. ++ * @dev: Pointer to the clock's device. + * @rwsem: Protects the 'zombie' field from concurrent access. + * @zombie: If 'zombie' is true, then the hardware has disappeared. +- * @release: A function to free the structure when the reference count reaches +- * zero. May be NULL if structure is statically allocated. + * + * Drivers should embed their struct posix_clock within a private + * structure, obtaining a reference to it during callbacks using + * container_of(). ++ * ++ * Drivers should supply an initialized but not exposed struct device ++ * to posix_clock_register(). It is used to manage lifetime of the ++ * driver's private structure. It's 'release' field should be set to ++ * a release function for this private structure. + */ + struct posix_clock { + struct posix_clock_operations ops; + struct cdev cdev; +- struct kref kref; ++ struct device *dev; + struct rw_semaphore rwsem; + bool zombie; +- void (*release)(struct posix_clock *clk); + }; + + /** + * posix_clock_register() - register a new clock +- * @clk: Pointer to the clock. Caller must provide 'ops' and 'release' +- * @devid: Allocated device id ++ * @clk: Pointer to the clock. Caller must provide 'ops' field ++ * @dev: Pointer to the initialized device. Caller must provide ++ * 'release' field + * + * A clock driver calls this function to register itself with the + * clock device subsystem. If 'clk' points to dynamically allocated +@@ -113,7 +116,7 @@ struct posix_clock { + * + * Returns zero on success, non-zero otherwise. + */ +-int posix_clock_register(struct posix_clock *clk, dev_t devid); ++int posix_clock_register(struct posix_clock *clk, struct device *dev); + + /** + * posix_clock_unregister() - unregister a clock +--- a/kernel/time/posix-clock.c ++++ b/kernel/time/posix-clock.c +@@ -27,8 +27,6 @@ + + #include "posix-timers.h" + +-static void delete_clock(struct kref *kref); +- + /* + * Returns NULL if the posix_clock instance attached to 'fp' is old and stale. + */ +@@ -138,7 +136,7 @@ static int posix_clock_open(struct inode + err = 0; + + if (!err) { +- kref_get(&clk->kref); ++ get_device(clk->dev); + fp->private_data = clk; + } + out: +@@ -154,7 +152,7 @@ static int posix_clock_release(struct in + if (clk->ops.release) + err = clk->ops.release(clk); + +- kref_put(&clk->kref, delete_clock); ++ put_device(clk->dev); + + fp->private_data = NULL; + +@@ -174,38 +172,35 @@ static const struct file_operations posi + #endif + }; + +-int posix_clock_register(struct posix_clock *clk, dev_t devid) ++int posix_clock_register(struct posix_clock *clk, struct device *dev) + { + int err; + +- kref_init(&clk->kref); + init_rwsem(&clk->rwsem); + + cdev_init(&clk->cdev, &posix_clock_file_operations); ++ err = cdev_device_add(&clk->cdev, dev); ++ if (err) { ++ pr_err("%s unable to add device %d:%d\n", ++ dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt)); ++ return err; ++ } + clk->cdev.owner = clk->ops.owner; +- err = cdev_add(&clk->cdev, devid, 1); ++ clk->dev = dev; + +- return err; ++ return 0; + } + EXPORT_SYMBOL_GPL(posix_clock_register); + +-static void delete_clock(struct kref *kref) +-{ +- struct posix_clock *clk = container_of(kref, struct posix_clock, kref); +- +- if (clk->release) +- clk->release(clk); +-} +- + void posix_clock_unregister(struct posix_clock *clk) + { +- cdev_del(&clk->cdev); ++ cdev_device_del(&clk->cdev, clk->dev); + + down_write(&clk->rwsem); + clk->zombie = true; + up_write(&clk->rwsem); + +- kref_put(&clk->kref, delete_clock); ++ put_device(clk->dev); + } + EXPORT_SYMBOL_GPL(posix_clock_unregister); + diff --git a/queue-4.14/series b/queue-4.14/series index d73850f66c0..63c0efc1993 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -72,3 +72,21 @@ spi-fsl-don-t-map-irq-during-probe.patch tty-serial-atmel-fix-out-of-range-clock-divider-hand.patch serial-sprd-use-readable-macros-instead-of-magic-num.patch pinctrl-baytrail-really-serialize-all-register-acces.patch +net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch +net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch +ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch +udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch +vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch +net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch +ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch +gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch +net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch +tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch +vti-do-not-confirm-neighbor-when-do-pmtu-update.patch +sit-do-not-confirm-neighbor-when-do-pmtu-update.patch +gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch +tcp-dccp-fix-possible-race-__inet_lookup_established.patch +tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch +gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch +gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch +gtp-avoid-zero-size-hashtable.patch diff --git a/queue-4.14/sit-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-4.14/sit-do-not-confirm-neighbor-when-do-pmtu-update.patch new file mode 100644 index 00000000000..a770a5f1b6a --- /dev/null +++ b/queue-4.14/sit-do-not-confirm-neighbor-when-do-pmtu-update.patch @@ -0,0 +1,39 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Hangbin Liu +Date: Sun, 22 Dec 2019 10:51:15 +0800 +Subject: sit: do not confirm neighbor when do pmtu update + +From: Hangbin Liu + +[ Upstream commit 4d42df46d6372ece4cb4279870b46c2ea7304a47 ] + +When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end, +we should not call dst_confirm_neigh() as there is no two-way communication. + +v5: No change. +v4: No change. +v3: Do not remove dst_confirm_neigh, but add a new bool parameter in + dst_ops.update_pmtu to control whether we should do neighbor confirm. + Also split the big patch to small ones for each area. +v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. + +Reviewed-by: Guillaume Nault +Acked-by: David Ahern +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/sit.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/sit.c ++++ b/net/ipv6/sit.c +@@ -932,7 +932,7 @@ static netdev_tx_t ipip6_tunnel_xmit(str + } + + if (tunnel->parms.iph.daddr) +- skb_dst_update_pmtu(skb, mtu); ++ skb_dst_update_pmtu_no_confirm(skb, mtu); + + if (skb->len > mtu && !skb_is_gso(skb)) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); diff --git a/queue-4.14/tcp-dccp-fix-possible-race-__inet_lookup_established.patch b/queue-4.14/tcp-dccp-fix-possible-race-__inet_lookup_established.patch new file mode 100644 index 00000000000..ede1fef271c --- /dev/null +++ b/queue-4.14/tcp-dccp-fix-possible-race-__inet_lookup_established.patch @@ -0,0 +1,220 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Eric Dumazet +Date: Fri, 13 Dec 2019 18:20:41 -0800 +Subject: tcp/dccp: fix possible race __inet_lookup_established() + +From: Eric Dumazet + +[ Upstream commit 8dbd76e79a16b45b2ccb01d2f2e08dbf64e71e40 ] + +Michal Kubecek and Firo Yang did a very nice analysis of crashes +happening in __inet_lookup_established(). + +Since a TCP socket can go from TCP_ESTABLISH to TCP_LISTEN +(via a close()/socket()/listen() cycle) without a RCU grace period, +I should not have changed listeners linkage in their hash table. + +They must use the nulls protocol (Documentation/RCU/rculist_nulls.txt), +so that a lookup can detect a socket in a hash list was moved in +another one. + +Since we added code in commit d296ba60d8e2 ("soreuseport: Resolve +merge conflict for v4/v6 ordering fix"), we have to add +hlist_nulls_add_tail_rcu() helper. + +Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") +Signed-off-by: Eric Dumazet +Reported-by: Michal Kubecek +Reported-by: Firo Yang +Reviewed-by: Michal Kubecek +Link: https://lore.kernel.org/netdev/20191120083919.GH27852@unicorn.suse.cz/ +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/rculist_nulls.h | 37 +++++++++++++++++++++++++++++++++++++ + include/net/inet_hashtables.h | 12 +++++++++--- + include/net/sock.h | 5 +++++ + net/ipv4/inet_diag.c | 3 ++- + net/ipv4/inet_hashtables.c | 15 +++++++-------- + net/ipv4/tcp_ipv4.c | 7 ++++--- + 6 files changed, 64 insertions(+), 15 deletions(-) + +--- a/include/linux/rculist_nulls.h ++++ b/include/linux/rculist_nulls.h +@@ -101,6 +101,43 @@ static inline void hlist_nulls_add_head_ + } + + /** ++ * hlist_nulls_add_tail_rcu ++ * @n: the element to add to the hash list. ++ * @h: the list to add to. ++ * ++ * Description: ++ * Adds the specified element to the specified hlist_nulls, ++ * while permitting racing traversals. ++ * ++ * The caller must take whatever precautions are necessary ++ * (such as holding appropriate locks) to avoid racing ++ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() ++ * or hlist_nulls_del_rcu(), running on this same list. ++ * However, it is perfectly legal to run concurrently with ++ * the _rcu list-traversal primitives, such as ++ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency ++ * problems on Alpha CPUs. Regardless of the type of CPU, the ++ * list-traversal primitive must be guarded by rcu_read_lock(). ++ */ ++static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, ++ struct hlist_nulls_head *h) ++{ ++ struct hlist_nulls_node *i, *last = NULL; ++ ++ /* Note: write side code, so rcu accessors are not needed. */ ++ for (i = h->first; !is_a_nulls(i); i = i->next) ++ last = i; ++ ++ if (last) { ++ n->next = last->next; ++ n->pprev = &last->next; ++ rcu_assign_pointer(hlist_next_rcu(last), n); ++ } else { ++ hlist_nulls_add_head_rcu(n, h); ++ } ++} ++ ++/** + * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_nulls_node to use as a loop cursor. +--- a/include/net/inet_hashtables.h ++++ b/include/net/inet_hashtables.h +@@ -106,12 +106,18 @@ struct inet_bind_hashbucket { + struct hlist_head chain; + }; + +-/* +- * Sockets can be hashed in established or listening table ++/* Sockets can be hashed in established or listening table. ++ * We must use different 'nulls' end-of-chain value for all hash buckets : ++ * A socket might transition from ESTABLISH to LISTEN state without ++ * RCU grace period. A lookup in ehash table needs to handle this case. + */ ++#define LISTENING_NULLS_BASE (1U << 29) + struct inet_listen_hashbucket { + spinlock_t lock; +- struct hlist_head head; ++ union { ++ struct hlist_head head; ++ struct hlist_nulls_head nulls_head; ++ }; + }; + + /* This is for listening sockets, thus all sockets which possess wildcards. */ +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -693,6 +693,11 @@ static inline void __sk_nulls_add_node_r + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); + } + ++static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list) ++{ ++ hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); ++} ++ + static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) + { + sock_hold(sk); +--- a/net/ipv4/inet_diag.c ++++ b/net/ipv4/inet_diag.c +@@ -911,11 +911,12 @@ void inet_diag_dump_icsk(struct inet_has + + for (i = s_i; i < INET_LHTABLE_SIZE; i++) { + struct inet_listen_hashbucket *ilb; ++ struct hlist_nulls_node *node; + + num = 0; + ilb = &hashinfo->listening_hash[i]; + spin_lock(&ilb->lock); +- sk_for_each(sk, &ilb->head) { ++ sk_nulls_for_each(sk, node, &ilb->nulls_head) { + struct inet_sock *inet = inet_sk(sk); + + if (!net_eq(sock_net(sk), net)) +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -442,10 +442,11 @@ static int inet_reuseport_add_sock(struc + struct inet_listen_hashbucket *ilb) + { + struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; ++ const struct hlist_nulls_node *node; + struct sock *sk2; + kuid_t uid = sock_i_uid(sk); + +- sk_for_each_rcu(sk2, &ilb->head) { ++ sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) { + if (sk2 != sk && + sk2->sk_family == sk->sk_family && + ipv6_only_sock(sk2) == ipv6_only_sock(sk) && +@@ -480,9 +481,9 @@ int __inet_hash(struct sock *sk, struct + } + if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && + sk->sk_family == AF_INET6) +- hlist_add_tail_rcu(&sk->sk_node, &ilb->head); ++ __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head); + else +- hlist_add_head_rcu(&sk->sk_node, &ilb->head); ++ __sk_nulls_add_node_rcu(sk, &ilb->nulls_head); + sock_set_flag(sk, SOCK_RCU_FREE); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + unlock: +@@ -525,10 +526,7 @@ void inet_unhash(struct sock *sk) + spin_lock_bh(lock); + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); +- if (listener) +- done = __sk_del_node_init(sk); +- else +- done = __sk_nulls_del_node_init_rcu(sk); ++ done = __sk_nulls_del_node_init_rcu(sk); + if (done) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + spin_unlock_bh(lock); +@@ -664,7 +662,8 @@ void inet_hashinfo_init(struct inet_hash + + for (i = 0; i < INET_LHTABLE_SIZE; i++) { + spin_lock_init(&h->listening_hash[i].lock); +- INIT_HLIST_HEAD(&h->listening_hash[i].head); ++ INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head, ++ i + LISTENING_NULLS_BASE); + } + } + EXPORT_SYMBOL_GPL(inet_hashinfo_init); +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1936,13 +1936,14 @@ static void *listening_get_next(struct s + struct tcp_iter_state *st = seq->private; + struct net *net = seq_file_net(seq); + struct inet_listen_hashbucket *ilb; ++ struct hlist_nulls_node *node; + struct sock *sk = cur; + + if (!sk) { + get_head: + ilb = &tcp_hashinfo.listening_hash[st->bucket]; + spin_lock(&ilb->lock); +- sk = sk_head(&ilb->head); ++ sk = sk_nulls_head(&ilb->nulls_head); + st->offset = 0; + goto get_sk; + } +@@ -1950,9 +1951,9 @@ get_head: + ++st->num; + ++st->offset; + +- sk = sk_next(sk); ++ sk = sk_nulls_next(sk); + get_sk: +- sk_for_each_from(sk) { ++ sk_nulls_for_each_from(sk, node) { + if (!net_eq(sock_net(sk), net)) + continue; + if (sk->sk_family == st->family) diff --git a/queue-4.14/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch b/queue-4.14/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch new file mode 100644 index 00000000000..20accebdd4d --- /dev/null +++ b/queue-4.14/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch @@ -0,0 +1,53 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Eric Dumazet +Date: Thu, 12 Dec 2019 12:55:29 -0800 +Subject: tcp: do not send empty skb from tcp_write_xmit() + +From: Eric Dumazet + +[ Upstream commit 1f85e6267caca44b30c54711652b0726fadbb131 ] + +Backport of commit fdfc5c8594c2 ("tcp: remove empty skb from +write queue in error cases") in linux-4.14 stable triggered +various bugs. One of them has been fixed in commit ba2ddb43f270 +("tcp: Don't dequeue SYN/FIN-segments from write-queue"), but +we still have crashes in some occasions. + +Root-cause is that when tcp_sendmsg() has allocated a fresh +skb and could not append a fragment before being blocked +in sk_stream_wait_memory(), tcp_write_xmit() might be called +and decide to send this fresh and empty skb. + +Sending an empty packet is not only silly, it might have caused +many issues we had in the past with tp->packets_out being +out of sync. + +Fixes: c65f7f00c587 ("[TCP]: Simplify SKB data portion allocation with NETIF_F_SG.") +Signed-off-by: Eric Dumazet +Cc: Christoph Paasch +Acked-by: Neal Cardwell +Cc: Jason Baron +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2380,6 +2380,14 @@ static bool tcp_write_xmit(struct sock * + if (tcp_small_queue_check(sk, skb, 0)) + break; + ++ /* Argh, we hit an empty skb(), presumably a thread ++ * is sleeping in sendmsg()/sk_stream_wait_memory(). ++ * We do not want to send a pure-ack packet and have ++ * a strange looking rtx queue with empty packet(s). ++ */ ++ if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) ++ break; ++ + if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) + break; + diff --git a/queue-4.14/tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-4.14/tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch new file mode 100644 index 00000000000..e49d9fda8d7 --- /dev/null +++ b/queue-4.14/tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch @@ -0,0 +1,62 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Hangbin Liu +Date: Sun, 22 Dec 2019 10:51:13 +0800 +Subject: tunnel: do not confirm neighbor when do pmtu update + +From: Hangbin Liu + +[ Upstream commit 7a1592bcb15d71400a98632727791d1e68ea0ee8 ] + +When do tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end, +we should not call dst_confirm_neigh() as there is no two-way communication. + +v5: No Change. +v4: Update commit description +v3: Do not remove dst_confirm_neigh, but add a new bool parameter in + dst_ops.update_pmtu to control whether we should do neighbor confirm. + Also split the big patch to small ones for each area. +v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. + +Fixes: 0dec879f636f ("net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP") +Reviewed-by: Guillaume Nault +Tested-by: Guillaume Nault +Acked-by: David Ahern +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 2 +- + net/ipv6/ip6_tunnel.c | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -521,7 +521,7 @@ static int tnl_update_pmtu(struct net_de + else + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + +- skb_dst_update_pmtu(skb, mtu); ++ skb_dst_update_pmtu_no_confirm(skb, mtu); + + if (skb->protocol == htons(ETH_P_IP)) { + if (!skb_is_gso(skb) && +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -652,7 +652,7 @@ ip4ip6_err(struct sk_buff *skb, struct i + if (rel_info > dst_mtu(skb_dst(skb2))) + goto out; + +- skb_dst_update_pmtu(skb2, rel_info); ++ skb_dst_update_pmtu_no_confirm(skb2, rel_info); + } + if (rel_type == ICMP_REDIRECT) + skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); +@@ -1138,7 +1138,7 @@ route_lookup: + mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ? + IPV6_MIN_MTU : IPV4_MIN_MTU); + +- skb_dst_update_pmtu(skb, mtu); ++ skb_dst_update_pmtu_no_confirm(skb, mtu); + if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { + *pmtu = mtu; + err = -EMSGSIZE; diff --git a/queue-4.14/udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch b/queue-4.14/udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch new file mode 100644 index 00000000000..082e1dcd405 --- /dev/null +++ b/queue-4.14/udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch @@ -0,0 +1,38 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Antonio Messina +Date: Thu, 19 Dec 2019 15:08:03 +0100 +Subject: udp: fix integer overflow while computing available space in sk_rcvbuf + +From: Antonio Messina + +[ Upstream commit feed8a4fc9d46c3126fb9fcae0e9248270c6321a ] + +When the size of the receive buffer for a socket is close to 2^31 when +computing if we have enough space in the buffer to copy a packet from +the queue to the buffer we might hit an integer overflow. + +When an user set net.core.rmem_default to a value close to 2^31 UDP +packets are dropped because of this overflow. This can be visible, for +instance, with failure to resolve hostnames. + +This can be fixed by casting sk_rcvbuf (which is an int) to unsigned +int, similarly to how it is done in TCP. + +Signed-off-by: Antonio Messina +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1338,7 +1338,7 @@ int __udp_enqueue_schedule_skb(struct so + * queue contains some other skb + */ + rmem = atomic_add_return(size, &sk->sk_rmem_alloc); +- if (rmem > (size + sk->sk_rcvbuf)) ++ if (rmem > (size + (unsigned int)sk->sk_rcvbuf)) + goto uncharge_drop; + + spin_lock(&list->lock); diff --git a/queue-4.14/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch b/queue-4.14/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch new file mode 100644 index 00000000000..977668bd7eb --- /dev/null +++ b/queue-4.14/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch @@ -0,0 +1,35 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Stefano Garzarella +Date: Fri, 6 Dec 2019 15:39:12 +0100 +Subject: vhost/vsock: accept only packets with the right dst_cid + +From: Stefano Garzarella + +[ Upstream commit 8a3cc29c316c17de590e3ff8b59f3d6cbfd37b0a ] + +When we receive a new packet from the guest, we check if the +src_cid is correct, but we forgot to check the dst_cid. + +The host should accept only packets where dst_cid is +equal to the host CID. + +Signed-off-by: Stefano Garzarella +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/vsock.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/vhost/vsock.c ++++ b/drivers/vhost/vsock.c +@@ -436,7 +436,9 @@ static void vhost_vsock_handle_tx_kick(s + virtio_transport_deliver_tap_pkt(pkt); + + /* Only accept correctly addressed packets */ +- if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) ++ if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid && ++ le64_to_cpu(pkt->hdr.dst_cid) == ++ vhost_transport_get_local_cid()) + virtio_transport_recv_pkt(pkt); + else + virtio_transport_free_pkt(pkt); diff --git a/queue-4.14/vti-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-4.14/vti-do-not-confirm-neighbor-when-do-pmtu-update.patch new file mode 100644 index 00000000000..02099b9bdee --- /dev/null +++ b/queue-4.14/vti-do-not-confirm-neighbor-when-do-pmtu-update.patch @@ -0,0 +1,55 @@ +From foo@baz Thu 02 Jan 2020 11:13:41 AM CET +From: Hangbin Liu +Date: Sun, 22 Dec 2019 10:51:14 +0800 +Subject: vti: do not confirm neighbor when do pmtu update + +From: Hangbin Liu + +[ Upstream commit 8247a79efa2f28b44329f363272550c1738377de ] + +When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end, +we should not call dst_confirm_neigh() as there is no two-way communication. + +Although vti and vti6 are immune to this problem because they are IFF_NOARP +interfaces, as Guillaume pointed. There is still no sense to confirm neighbour +here. + +v5: Update commit description. +v4: No change. +v3: Do not remove dst_confirm_neigh, but add a new bool parameter in + dst_ops.update_pmtu to control whether we should do neighbor confirm. + Also split the big patch to small ones for each area. +v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. + +Reviewed-by: Guillaume Nault +Acked-by: David Ahern +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_vti.c | 2 +- + net/ipv6/ip6_vti.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ip_vti.c ++++ b/net/ipv4/ip_vti.c +@@ -244,7 +244,7 @@ static netdev_tx_t vti_xmit(struct sk_bu + + mtu = dst_mtu(dst); + if (skb->len > mtu) { +- skb_dst_update_pmtu(skb, mtu); ++ skb_dst_update_pmtu_no_confirm(skb, mtu); + if (skb->protocol == htons(ETH_P_IP)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); +--- a/net/ipv6/ip6_vti.c ++++ b/net/ipv6/ip6_vti.c +@@ -483,7 +483,7 @@ vti6_xmit(struct sk_buff *skb, struct ne + + mtu = dst_mtu(dst); + if (skb->len > mtu) { +- skb_dst_update_pmtu(skb, mtu); ++ skb_dst_update_pmtu_no_confirm(skb, mtu); + + if (skb->protocol == htons(ETH_P_IPV6)) { + if (mtu < IPV6_MIN_MTU)