--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:48 +0000
+Subject: gtp: avoid zero size hashtable
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 6a902c0f31993ab02e1b6ea7085002b9c9083b6a ]
+
+GTP default hashtable size is 1024 and userspace could set specific
+hashtable size with IFLA_GTP_PDP_HASHSIZE. If hashtable size is set to 0
+from userspace, hashtable will not work and panic will occur.
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -671,10 +671,13 @@ static int gtp_newlink(struct net *src_n
+ if (err < 0)
+ return err;
+
+- if (!data[IFLA_GTP_PDP_HASHSIZE])
++ if (!data[IFLA_GTP_PDP_HASHSIZE]) {
+ hashsize = 1024;
+- else
++ } else {
+ hashsize = nla_get_u32(data[IFLA_GTP_PDP_HASHSIZE]);
++ if (!hashsize)
++ hashsize = 1024;
++ }
+
+ err = gtp_hashtable_new(gtp, hashsize);
+ if (err < 0)
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:00 +0000
+Subject: gtp: do not allow adding duplicate tid and ms_addr pdp context
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 6b01b1d9b2d38dc84ac398bfe9f00baff06a31e5 ]
+
+GTP RX packet path lookups pdp context with TID. If duplicate TID pdp
+contexts are existing in the list, it couldn't select correct pdp context.
+So, TID value should be unique.
+GTP TX packet path lookups pdp context with ms_addr. If duplicate ms_addr pdp
+contexts are existing in the list, it couldn't select correct pdp context.
+So, ms_addr value should be unique.
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c | 32 ++++++++++++++++++++++----------
+ 1 file changed, 22 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -931,24 +931,31 @@ static void ipv4_pdp_fill(struct pdp_ctx
+ }
+ }
+
+-static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk,
+- struct genl_info *info)
++static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
++ struct genl_info *info)
+ {
++ struct pdp_ctx *pctx, *pctx_tid = NULL;
+ struct net_device *dev = gtp->dev;
+ u32 hash_ms, hash_tid = 0;
+- struct pdp_ctx *pctx;
++ unsigned int version;
+ bool found = false;
+ __be32 ms_addr;
+
+ ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]);
+ hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size;
++ version = nla_get_u32(info->attrs[GTPA_VERSION]);
+
+- hlist_for_each_entry_rcu(pctx, >p->addr_hash[hash_ms], hlist_addr) {
+- if (pctx->ms_addr_ip4.s_addr == ms_addr) {
+- found = true;
+- break;
+- }
+- }
++ pctx = ipv4_pdp_find(gtp, ms_addr);
++ if (pctx)
++ found = true;
++ if (version == GTP_V0)
++ pctx_tid = gtp0_pdp_find(gtp,
++ nla_get_u64(info->attrs[GTPA_TID]));
++ else if (version == GTP_V1)
++ pctx_tid = gtp1_pdp_find(gtp,
++ nla_get_u32(info->attrs[GTPA_I_TEI]));
++ if (pctx_tid)
++ found = true;
+
+ if (found) {
+ if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
+@@ -956,6 +963,11 @@ static int ipv4_pdp_add(struct gtp_dev *
+ if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+
++ if (pctx && pctx_tid)
++ return -EEXIST;
++ if (!pctx)
++ pctx = pctx_tid;
++
+ ipv4_pdp_fill(pctx, info);
+
+ if (pctx->gtp_version == GTP_V0)
+@@ -1079,7 +1091,7 @@ static int gtp_genl_new_pdp(struct sk_bu
+ goto out_unlock;
+ }
+
+- err = ipv4_pdp_add(gtp, sk, info);
++ err = gtp_pdp_add(gtp, sk, info);
+
+ out_unlock:
+ rcu_read_unlock();
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:11 +0800
+Subject: gtp: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 6e9105c73f8d2163d12d5dfd762fd75483ed30f5 ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+Although GTP only support ipv4 right now, and __ip_rt_update_pmtu() does not
+call dst_confirm_neigh(), we still set it to false to keep consistency with
+IPv6 code.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -545,7 +545,7 @@ static int gtp_build_skb_ip4(struct sk_b
+ mtu = dst_mtu(&rt->dst);
+ }
+
+- rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true);
++ rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false);
+
+ if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
+ mtu < ntohs(iph->tot_len)) {
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:34 +0000
+Subject: gtp: fix an use-after-free in ipv4_pdp_find()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 94dc550a5062030569d4aa76e10e50c8fc001930 ]
+
+ipv4_pdp_find() is called in TX packet path of GTP.
+ipv4_pdp_find() internally uses gtp->tid_hash to lookup pdp context.
+In the current code, gtp->tid_hash and gtp->addr_hash are freed by
+->dellink(), which is gtp_dellink().
+But gtp_dellink() would be called while packets are processing.
+So, gtp_dellink() should not free gtp->tid_hash and gtp->addr_hash.
+Instead, dev->priv_destructor() would be used because this callback
+is called after all packet processing safely.
+
+Test commands:
+ ip link add veth1 type veth peer name veth2
+ ip a a 172.0.0.1/24 dev veth1
+ ip link set veth1 up
+ ip a a 172.99.0.1/32 dev lo
+
+ gtp-link add gtp1 &
+
+ gtp-tunnel add gtp1 v1 200 100 172.99.0.2 172.0.0.2
+ ip r a 172.99.0.2/32 dev gtp1
+ ip link set gtp1 mtu 1500
+
+ ip netns add ns2
+ ip link set veth2 netns ns2
+ ip netns exec ns2 ip a a 172.0.0.2/24 dev veth2
+ ip netns exec ns2 ip link set veth2 up
+ ip netns exec ns2 ip a a 172.99.0.2/32 dev lo
+ ip netns exec ns2 ip link set lo up
+
+ ip netns exec ns2 gtp-link add gtp2 &
+ ip netns exec ns2 gtp-tunnel add gtp2 v1 100 200 172.99.0.1 172.0.0.1
+ ip netns exec ns2 ip r a 172.99.0.1/32 dev gtp2
+ ip netns exec ns2 ip link set gtp2 mtu 1500
+
+ hping3 172.99.0.2 -2 --flood &
+ ip link del gtp1
+
+Splat looks like:
+[ 72.568081][ T1195] BUG: KASAN: use-after-free in ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[ 72.568916][ T1195] Read of size 8 at addr ffff8880b9a35d28 by task hping3/1195
+[ 72.569631][ T1195]
+[ 72.569861][ T1195] CPU: 2 PID: 1195 Comm: hping3 Not tainted 5.5.0-rc1 #199
+[ 72.570547][ T1195] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[ 72.571438][ T1195] Call Trace:
+[ 72.571764][ T1195] dump_stack+0x96/0xdb
+[ 72.572171][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[ 72.572761][ T1195] print_address_description.constprop.5+0x1be/0x360
+[ 72.573400][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[ 72.573971][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[ 72.574544][ T1195] __kasan_report+0x12a/0x16f
+[ 72.575014][ T1195] ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[ 72.575593][ T1195] kasan_report+0xe/0x20
+[ 72.576004][ T1195] ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[ 72.576577][ T1195] gtp_build_skb_ip4+0x199/0x1420 [gtp]
+[ ... ]
+[ 72.647671][ T1195] BUG: unable to handle page fault for address: ffff8880b9a35d28
+[ 72.648512][ T1195] #PF: supervisor read access in kernel mode
+[ 72.649158][ T1195] #PF: error_code(0x0000) - not-present page
+[ 72.649849][ T1195] PGD a6c01067 P4D a6c01067 PUD 11fb07067 PMD 11f939067 PTE 800fffff465ca060
+[ 72.652958][ T1195] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
+[ 72.653834][ T1195] CPU: 2 PID: 1195 Comm: hping3 Tainted: G B 5.5.0-rc1 #199
+[ 72.668062][ T1195] RIP: 0010:ipv4_pdp_find.isra.12+0x86/0x170 [gtp]
+[ ... ]
+[ 72.679168][ T1195] Call Trace:
+[ 72.679603][ T1195] gtp_build_skb_ip4+0x199/0x1420 [gtp]
+[ 72.681915][ T1195] ? ipv4_pdp_find.isra.12+0x170/0x170 [gtp]
+[ 72.682513][ T1195] ? lock_acquire+0x164/0x3b0
+[ 72.682966][ T1195] ? gtp_dev_xmit+0x35e/0x890 [gtp]
+[ 72.683481][ T1195] gtp_dev_xmit+0x3c2/0x890 [gtp]
+[ ... ]
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c | 34 +++++++++++++++++-----------------
+ 1 file changed, 17 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -644,9 +644,16 @@ static void gtp_link_setup(struct net_de
+ }
+
+ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
+-static void gtp_hashtable_free(struct gtp_dev *gtp);
+ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]);
+
++static void gtp_destructor(struct net_device *dev)
++{
++ struct gtp_dev *gtp = netdev_priv(dev);
++
++ kfree(gtp->addr_hash);
++ kfree(gtp->tid_hash);
++}
++
+ static int gtp_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+@@ -681,13 +688,15 @@ static int gtp_newlink(struct net *src_n
+
+ gn = net_generic(dev_net(dev), gtp_net_id);
+ list_add_rcu(>p->list, &gn->gtp_dev_list);
++ dev->priv_destructor = gtp_destructor;
+
+ netdev_dbg(dev, "registered new GTP interface\n");
+
+ return 0;
+
+ out_hashtable:
+- gtp_hashtable_free(gtp);
++ kfree(gtp->addr_hash);
++ kfree(gtp->tid_hash);
+ out_encap:
+ gtp_encap_disable(gtp);
+ return err;
+@@ -696,9 +705,14 @@ out_encap:
+ static void gtp_dellink(struct net_device *dev, struct list_head *head)
+ {
+ struct gtp_dev *gtp = netdev_priv(dev);
++ struct pdp_ctx *pctx;
++ int i;
++
++ for (i = 0; i < gtp->hash_size; i++)
++ hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid)
++ pdp_context_delete(pctx);
+
+ gtp_encap_disable(gtp);
+- gtp_hashtable_free(gtp);
+ list_del_rcu(>p->list);
+ unregister_netdevice_queue(dev, head);
+ }
+@@ -776,20 +790,6 @@ err1:
+ return -ENOMEM;
+ }
+
+-static void gtp_hashtable_free(struct gtp_dev *gtp)
+-{
+- struct pdp_ctx *pctx;
+- int i;
+-
+- for (i = 0; i < gtp->hash_size; i++)
+- hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid)
+- pdp_context_delete(pctx);
+-
+- synchronize_rcu();
+- kfree(gtp->addr_hash);
+- kfree(gtp->tid_hash);
+-}
+-
+ static struct sock *gtp_encap_enable_socket(int fd, int type,
+ struct gtp_dev *gtp)
+ {
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:17 +0000
+Subject: gtp: fix wrong condition in gtp_genl_dump_pdp()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 94a6d9fb88df43f92d943c32b84ce398d50bf49f ]
+
+gtp_genl_dump_pdp() is ->dumpit() callback of GTP module and it is used
+to dump pdp contexts. it would be re-executed because of dump packet size.
+
+If dump packet size is too big, it saves current dump pointer
+(gtp interface pointer, bucket, TID value) then it restarts dump from
+last pointer.
+Current GTP code allows adding zero TID pdp context but dump code
+ignores zero TID value. So, last dump pointer will not be found.
+
+In addition, this patch adds missing rcu_read_lock() in
+gtp_genl_dump_pdp().
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c | 36 +++++++++++++++++++-----------------
+ 1 file changed, 19 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -42,7 +42,6 @@ struct pdp_ctx {
+ struct hlist_node hlist_addr;
+
+ union {
+- u64 tid;
+ struct {
+ u64 tid;
+ u16 flow;
+@@ -1249,43 +1248,46 @@ static int gtp_genl_dump_pdp(struct sk_b
+ struct netlink_callback *cb)
+ {
+ struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp;
++ int i, j, bucket = cb->args[0], skip = cb->args[1];
+ struct net *net = sock_net(skb->sk);
+- struct gtp_net *gn = net_generic(net, gtp_net_id);
+- unsigned long tid = cb->args[1];
+- int i, k = cb->args[0], ret;
+ struct pdp_ctx *pctx;
++ struct gtp_net *gn;
++
++ gn = net_generic(net, gtp_net_id);
+
+ if (cb->args[4])
+ return 0;
+
++ rcu_read_lock();
+ list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) {
+ if (last_gtp && last_gtp != gtp)
+ continue;
+ else
+ last_gtp = NULL;
+
+- for (i = k; i < gtp->hash_size; i++) {
+- hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) {
+- if (tid && tid != pctx->u.tid)
+- continue;
+- else
+- tid = 0;
+-
+- ret = gtp_genl_fill_info(skb,
+- NETLINK_CB(cb->skb).portid,
+- cb->nlh->nlmsg_seq,
+- cb->nlh->nlmsg_type, pctx);
+- if (ret < 0) {
++ for (i = bucket; i < gtp->hash_size; i++) {
++ j = 0;
++ hlist_for_each_entry_rcu(pctx, >p->tid_hash[i],
++ hlist_tid) {
++ if (j >= skip &&
++ gtp_genl_fill_info(skb,
++ NETLINK_CB(cb->skb).portid,
++ cb->nlh->nlmsg_seq,
++ cb->nlh->nlmsg_type, pctx)) {
+ cb->args[0] = i;
+- cb->args[1] = pctx->u.tid;
++ cb->args[1] = j;
+ cb->args[2] = (unsigned long)gtp;
+ goto out;
+ }
++ j++;
+ }
++ skip = 0;
+ }
++ bucket = 0;
+ }
+ cb->args[4] = 1;
+ out:
++ rcu_read_unlock();
+ return skb->len;
+ }
+
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:10 +0800
+Subject: ip6_gre: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 675d76ad0ad5bf41c9a129772ef0aba8f57ea9a7 ]
+
+When we do ipv6 gre pmtu update, we will also do neigh confirm currently.
+This will cause the neigh cache be refreshed and set to REACHABLE before
+xmit.
+
+But if the remote mac address changed, e.g. device is deleted and recreated,
+we will not able to notice this and still use the old mac address as the neigh
+cache is REACHABLE.
+
+Fix this by disable neigh confirm when do pmtu update
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1060,7 +1060,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit
+
+ /* TooBig packet may have updated dst->dev's mtu */
+ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+- dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true);
++ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false);
+
+ err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ NEXTHDR_GRE);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:09 +0800
+Subject: net: add bool confirm_neigh parameter for dst_ops.update_pmtu
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit bd085ef678b2cc8c38c105673dfe8ff8f5ec0c57 ]
+
+The MTU update code is supposed to be invoked in response to real
+networking events that update the PMTU. In IPv6 PMTU update function
+__ip6_rt_update_pmtu() we called dst_confirm_neigh() to update neighbor
+confirmed time.
+
+But for tunnel code, it will call pmtu before xmit, like:
+ - tnl_update_pmtu()
+ - skb_dst_update_pmtu()
+ - ip6_rt_update_pmtu()
+ - __ip6_rt_update_pmtu()
+ - dst_confirm_neigh()
+
+If the tunnel remote dst mac address changed and we still do the neigh
+confirm, we will not be able to update neigh cache and ping6 remote
+will failed.
+
+So for this ip_tunnel_xmit() case, _EVEN_ if the MTU is changed, we
+should not be invoking dst_confirm_neigh() as we have no evidence
+of successful two-way communication at this point.
+
+On the other hand it is also important to keep the neigh reachability fresh
+for TCP flows, so we cannot remove this dst_confirm_neigh() call.
+
+To fix the issue, we have to add a new bool parameter for dst_ops.update_pmtu
+to choose whether we should do neigh update or not. I will add the parameter
+in this patch and set all the callers to true to comply with the previous
+way, and fix the tunnel code one by one on later patches.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Suggested-by: David Miller <davem@davemloft.net>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c | 2 +-
+ include/net/dst.h | 2 +-
+ include/net/dst_ops.h | 3 ++-
+ net/bridge/br_nf_core.c | 3 ++-
+ net/decnet/dn_route.c | 6 ++++--
+ net/ipv4/inet_connection_sock.c | 2 +-
+ net/ipv4/route.c | 9 ++++++---
+ net/ipv4/xfrm4_policy.c | 5 +++--
+ net/ipv6/inet6_connection_sock.c | 2 +-
+ net/ipv6/ip6_gre.c | 2 +-
+ net/ipv6/route.c | 22 +++++++++++++++-------
+ net/ipv6/xfrm6_policy.c | 5 +++--
+ net/netfilter/ipvs/ip_vs_xmit.c | 2 +-
+ net/sctp/transport.c | 2 +-
+ 14 files changed, 42 insertions(+), 25 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -545,7 +545,7 @@ static int gtp_build_skb_ip4(struct sk_b
+ mtu = dst_mtu(&rt->dst);
+ }
+
+- rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu);
++ rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true);
+
+ if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
+ mtu < ntohs(iph->tot_len)) {
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -527,7 +527,7 @@ static inline void skb_dst_update_pmtu(s
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (dst && dst->ops->update_pmtu)
+- dst->ops->update_pmtu(dst, NULL, skb, mtu);
++ dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
+ }
+
+ static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+--- a/include/net/dst_ops.h
++++ b/include/net/dst_ops.h
+@@ -27,7 +27,8 @@ struct dst_ops {
+ struct dst_entry * (*negative_advice)(struct dst_entry *);
+ void (*link_failure)(struct sk_buff *);
+ void (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu);
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh);
+ void (*redirect)(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
+ int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb);
+--- a/net/bridge/br_nf_core.c
++++ b/net/bridge/br_nf_core.c
+@@ -26,7 +26,8 @@
+ #endif
+
+ static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ }
+
+--- a/net/decnet/dn_route.c
++++ b/net/decnet/dn_route.c
+@@ -118,7 +118,8 @@ static void dn_dst_ifdown(struct dst_ent
+ static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
+ static void dn_dst_link_failure(struct sk_buff *);
+ static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb , u32 mtu);
++ struct sk_buff *skb , u32 mtu,
++ bool confirm_neigh);
+ static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
+ static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
+@@ -259,7 +260,8 @@ static int dn_dst_gc(struct dst_ops *ops
+ * advertise to the other end).
+ */
+ static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ struct dn_route *rt = (struct dn_route *) dst;
+ struct neighbour *n = rt->n;
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -1089,7 +1089,7 @@ struct dst_entry *inet_csk_update_pmtu(s
+ if (!dst)
+ goto out;
+ }
+- dst->ops->update_pmtu(dst, sk, NULL, mtu);
++ dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
+
+ dst = __sk_dst_check(sk, 0);
+ if (!dst)
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -142,7 +142,8 @@ static unsigned int ipv4_mtu(const stru
+ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
+ static void ipv4_link_failure(struct sk_buff *skb);
+ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu);
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh);
+ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
+ static void ipv4_dst_destroy(struct dst_entry *dst);
+@@ -1035,7 +1036,8 @@ static void __ip_rt_update_pmtu(struct r
+ }
+
+ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ struct rtable *rt = (struct rtable *) dst;
+ struct flowi4 fl4;
+@@ -2559,7 +2561,8 @@ static unsigned int ipv4_blackhole_mtu(c
+ }
+
+ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ }
+
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -222,12 +222,13 @@ _decode_session4(struct sk_buff *skb, st
+ }
+
+ static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ struct dst_entry *path = xdst->route;
+
+- path->ops->update_pmtu(path, sk, skb, mtu);
++ path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
+ }
+
+ static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk,
+--- a/net/ipv6/inet6_connection_sock.c
++++ b/net/ipv6/inet6_connection_sock.c
+@@ -150,7 +150,7 @@ struct dst_entry *inet6_csk_update_pmtu(
+
+ if (IS_ERR(dst))
+ return NULL;
+- dst->ops->update_pmtu(dst, sk, NULL, mtu);
++ dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
+
+ dst = inet6_csk_route_socket(sk, &fl6);
+ return IS_ERR(dst) ? NULL : dst;
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1060,7 +1060,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit
+
+ /* TooBig packet may have updated dst->dev's mtu */
+ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+- dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
++ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true);
+
+ err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ NEXTHDR_GRE);
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -99,7 +99,8 @@ static int ip6_pkt_prohibit(struct sk_b
+ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
+ static void ip6_link_failure(struct sk_buff *skb);
+ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu);
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh);
+ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb);
+ static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
+@@ -266,7 +267,8 @@ static unsigned int ip6_blackhole_mtu(co
+ }
+
+ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ }
+
+@@ -2352,7 +2354,8 @@ static bool rt6_cache_allowed_for_pmtu(c
+ }
+
+ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
+- const struct ipv6hdr *iph, u32 mtu)
++ const struct ipv6hdr *iph, u32 mtu,
++ bool confirm_neigh)
+ {
+ const struct in6_addr *daddr, *saddr;
+ struct rt6_info *rt6 = (struct rt6_info *)dst;
+@@ -2370,7 +2373,10 @@ static void __ip6_rt_update_pmtu(struct
+ daddr = NULL;
+ saddr = NULL;
+ }
+- dst_confirm_neigh(dst, daddr);
++
++ if (confirm_neigh)
++ dst_confirm_neigh(dst, daddr);
++
+ mtu = max_t(u32, mtu, IPV6_MIN_MTU);
+ if (mtu >= dst_mtu(dst))
+ return;
+@@ -2401,9 +2407,11 @@ static void __ip6_rt_update_pmtu(struct
+ }
+
+ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+- __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
++ __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
++ confirm_neigh);
+ }
+
+ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+@@ -2423,7 +2431,7 @@ void ip6_update_pmtu(struct sk_buff *skb
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (!dst->error)
+- __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
++ __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
+ dst_release(dst);
+ }
+ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -221,12 +221,13 @@ _decode_session6(struct sk_buff *skb, st
+ }
+
+ static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
+- struct sk_buff *skb, u32 mtu)
++ struct sk_buff *skb, u32 mtu,
++ bool confirm_neigh)
+ {
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ struct dst_entry *path = xdst->route;
+
+- path->ops->update_pmtu(path, sk, skb, mtu);
++ path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
+ }
+
+ static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
+--- a/net/netfilter/ipvs/ip_vs_xmit.c
++++ b/net/netfilter/ipvs/ip_vs_xmit.c
+@@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int
+ struct rtable *ort = skb_rtable(skb);
+
+ if (!skb->dev && sk && sk_fullsock(sk))
+- ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
++ ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true);
+ }
+
+ static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
+--- a/net/sctp/transport.c
++++ b/net/sctp/transport.c
+@@ -278,7 +278,7 @@ bool sctp_transport_update_pmtu(struct s
+
+ pf->af->from_sk(&addr, sk);
+ pf->to_sk_daddr(&t->ipaddr, sk);
+- dst->ops->update_pmtu(dst, sk, NULL, pmtu);
++ dst->ops->update_pmtu(dst, sk, NULL, pmtu, true);
+ pf->to_sk_daddr(&addr, sk);
+
+ dst = sctp_transport_dst_check(t);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:12 +0800
+Subject: net/dst: add new function skb_dst_update_pmtu_no_confirm
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 07dc35c6e3cc3c001915d05f5bf21f80a39a0970 ]
+
+Add a new function skb_dst_update_pmtu_no_confirm() for callers who need
+update pmtu but should not do neighbor confirm.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst.h | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -530,6 +530,15 @@ static inline void skb_dst_update_pmtu(s
+ dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
+ }
+
++/* update dst pmtu but not do neighbor confirm */
++static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
++{
++ struct dst_entry *dst = skb_dst(skb);
++
++ if (dst && dst->ops->update_pmtu)
++ dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
++}
++
+ static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+ struct dst_entry *encap_dst,
+ int headroom)
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:16 +0800
+Subject: net/dst: do not confirm neighbor for vxlan and geneve pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit f081042d128a0c7acbd67611def62e1b52e2d294 ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+So disable the neigh confirm for vxlan and geneve pmtu update.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Fixes: a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path")
+Fixes: 52a589d51f10 ("geneve: update skb dst pmtu on tx path")
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Tested-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -546,7 +546,7 @@ static inline void skb_tunnel_check_pmtu
+ u32 encap_mtu = dst_mtu(encap_dst);
+
+ if (skb->len > encap_mtu - headroom)
+- skb_dst_update_pmtu(skb, encap_mtu - headroom);
++ skb_dst_update_pmtu_no_confirm(skb, encap_mtu - headroom);
+ }
+
+ #endif /* _NET_DST_H */
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Netanel Belgazal <netanel@amazon.com>
+Date: Tue, 10 Dec 2019 11:27:44 +0000
+Subject: net: ena: fix napi handler misbehavior when the napi budget is zero
+
+From: Netanel Belgazal <netanel@amazon.com>
+
+[ Upstream commit 24dee0c7478d1a1e00abdf5625b7f921467325dc ]
+
+In netpoll the napi handler could be called with budget equal to zero.
+Current ENA napi handler doesn't take that into consideration.
+
+The napi handler handles Rx packets in a do-while loop.
+Currently, the budget check happens only after decrementing the
+budget, therefore the napi handler, in rare cases, could run over
+MAX_INT packets.
+
+In addition to that, this moves all budget related variables to int
+calculation and stop mixing u32 to avoid ambiguity
+
+Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)")
+Signed-off-by: Netanel Belgazal <netanel@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -1197,8 +1197,8 @@ static int ena_io_poll(struct napi_struc
+ struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+ struct ena_ring *tx_ring, *rx_ring;
+
+- u32 tx_work_done;
+- u32 rx_work_done;
++ int tx_work_done;
++ int rx_work_done = 0;
+ int tx_budget;
+ int napi_comp_call = 0;
+ int ret;
+@@ -1215,7 +1215,11 @@ static int ena_io_poll(struct napi_struc
+ }
+
+ tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
+- rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
++ /* On netpoll the budget is zero and the handler should only clean the
++ * tx completions.
++ */
++ if (likely(budget))
++ rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
+
+ /* If the device is about to reset or down, avoid unmask
+ * the interrupt and return 0 so NAPI won't reschedule
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Tue, 10 Dec 2019 22:33:05 +0000
+Subject: net: marvell: mvpp2: phylink requires the link interrupt
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+[ Upstream commit f3f2364ea14d1cf6bf966542f31eadcf178f1577 ]
+
+phylink requires the MAC to report when its link status changes when
+operating in inband modes. Failure to report link status changes
+means that phylink has no idea when the link events happen, which
+results in either the network interface's carrier remaining up or
+remaining permanently down.
+
+For example, with a fiber module, if the interface is brought up and
+link is initially established, taking the link down at the far end
+will cut the optical power. The SFP module's LOS asserts, we
+deactivate the link, and the network interface reports no carrier.
+
+When the far end is brought back up, the SFP module's LOS deasserts,
+but the MAC may be slower to establish link. If this happens (which
+in my tests is a certainty) then phylink never hears that the MAC
+has established link with the far end, and the network interface is
+stuck reporting no carrier. This means the interface is
+non-functional.
+
+Avoiding the link interrupt when we have phylink is basically not
+an option, so remove the !port->phylink from the test.
+
+Fixes: 4bb043262878 ("net: mvpp2: phylink support")
+Tested-by: Sven Auhagen <sven.auhagen@voleatech.de>
+Tested-by: Antoine Tenart <antoine.tenart@bootlin.com>
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+@@ -3341,7 +3341,7 @@ static int mvpp2_open(struct net_device
+ valid = true;
+ }
+
+- if (priv->hw_version == MVPP22 && port->link_irq && !port->phylink) {
++ if (priv->hw_version == MVPP22 && port->link_irq) {
+ err = request_irq(port->link_irq, mvpp2_link_status_isr, 0,
+ dev->name, port);
+ if (err) {
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+Date: Thu, 26 Dec 2019 10:41:56 +0200
+Subject: net/mlxfw: Fix out-of-memory error in mfa2 flash burning
+
+From: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+
+[ Upstream commit a5bcd72e054aabb93ddc51ed8cde36a5bfc50271 ]
+
+The burning process requires to perform internal allocations of large
+chunks of memory. This memory doesn't need to be contiguous and can be
+safely allocated by vzalloc() instead of kzalloc(). This patch changes
+such allocation to avoid possible out-of-memory failure.
+
+Fixes: 410ed13cae39 ("Add the mlxfw module for Mellanox firmware flash process")
+Signed-off-by: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+Reviewed-by: Aya Levin <ayal@mellanox.com>
+Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
+Tested-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
++++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
+@@ -37,6 +37,7 @@
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/netlink.h>
++#include <linux/vmalloc.h>
+ #include <linux/xz.h>
+ #include "mlxfw_mfa2.h"
+ #include "mlxfw_mfa2_file.h"
+@@ -579,7 +580,7 @@ mlxfw_mfa2_file_component_get(const stru
+ comp_size = be32_to_cpu(comp->size);
+ comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len;
+
+- comp_data = kmalloc(sizeof(*comp_data) + comp_buf_size, GFP_KERNEL);
++ comp_data = vzalloc(sizeof(*comp_data) + comp_buf_size);
+ if (!comp_data)
+ return ERR_PTR(-ENOMEM);
+ comp_data->comp.data_size = comp_size;
+@@ -601,7 +602,7 @@ mlxfw_mfa2_file_component_get(const stru
+ comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len;
+ return &comp_data->comp;
+ err_out:
+- kfree(comp_data);
++ vfree(comp_data);
+ return ERR_PTR(err);
+ }
+
+@@ -610,7 +611,7 @@ void mlxfw_mfa2_file_component_put(struc
+ const struct mlxfw_mfa2_comp_data *comp_data;
+
+ comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp);
+- kfree(comp_data);
++ vfree(comp_data);
+ }
+
+ void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file)
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Date: Thu, 26 Dec 2019 20:01:01 +0100
+Subject: net: stmmac: dwmac-meson8b: Fix the RGMII TX delay on Meson8b/8m2 SoCs
+
+From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+[ Upstream commit bd6f48546b9cb7a785344fc78058c420923d7ed8 ]
+
+GXBB and newer SoCs use the fixed FCLK_DIV2 (1GHz) clock as input for
+the m250_sel clock. Meson8b and Meson8m2 use MPLL2 instead, whose rate
+can be adjusted at runtime.
+
+So far we have been running MPLL2 with ~250MHz (and the internal
+m250_div with value 1), which worked enough that we could transfer data
+with an TX delay of 4ns. Unfortunately there is high packet loss with
+an RGMII PHY when transferring data (receiving data works fine though).
+Odroid-C1's u-boot is running with a TX delay of only 2ns as well as
+the internal m250_div set to 2 - no lost (TX) packets can be observed
+with that setting in u-boot.
+
+Manual testing has shown that the TX packet loss goes away when using
+the following settings in Linux (the vendor kernel uses the same
+settings):
+- MPLL2 clock set to ~500MHz
+- m250_div set to 2
+- TX delay set to 2ns on the MAC side
+
+Update the m250_div divider settings to only accept dividers greater or
+equal 2 to fix the TX delay generated by the MAC.
+
+iperf3 results before the change:
+[ ID] Interval Transfer Bitrate Retr
+[ 5] 0.00-10.00 sec 182 MBytes 153 Mbits/sec 514 sender
+[ 5] 0.00-10.00 sec 182 MBytes 152 Mbits/sec receiver
+
+iperf3 results after the change (including an updated TX delay of 2ns):
+[ ID] Interval Transfer Bitrate Retr Cwnd
+[ 5] 0.00-10.00 sec 927 MBytes 778 Mbits/sec 0 sender
+[ 5] 0.00-10.01 sec 927 MBytes 777 Mbits/sec receiver
+
+Fixes: 4f6a71b84e1afd ("net: stmmac: dwmac-meson8b: fix internal RGMII clock configuration")
+Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+@@ -118,6 +118,14 @@ static int meson8b_init_rgmii_tx_clk(str
+ struct device *dev = dwmac->dev;
+ const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
+ struct meson8b_dwmac_clk_configs *clk_configs;
++ static const struct clk_div_table div_table[] = {
++ { .div = 2, .val = 2, },
++ { .div = 3, .val = 3, },
++ { .div = 4, .val = 4, },
++ { .div = 5, .val = 5, },
++ { .div = 6, .val = 6, },
++ { .div = 7, .val = 7, },
++ };
+
+ clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL);
+ if (!clk_configs)
+@@ -152,9 +160,9 @@ static int meson8b_init_rgmii_tx_clk(str
+ clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0;
+ clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
+ clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
+- clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED |
+- CLK_DIVIDER_ALLOW_ZERO |
+- CLK_DIVIDER_ROUND_CLOSEST;
++ clk_configs->m250_div.table = div_table;
++ clk_configs->m250_div.flags = CLK_DIVIDER_ALLOW_ZERO |
++ CLK_DIVIDER_ROUND_CLOSEST;
+ clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1,
+ &clk_divider_ops,
+ &clk_configs->m250_div.hw);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Vladis Dronov <vdronov@redhat.com>
+Date: Fri, 27 Dec 2019 03:26:27 +0100
+Subject: ptp: fix the race between the release of ptp_clock and cdev
+
+From: Vladis Dronov <vdronov@redhat.com>
+
+[ Upstream commit a33121e5487b424339636b25c35d3a180eaa5f5e ]
+
+In a case when a ptp chardev (like /dev/ptp0) is open but an underlying
+device is removed, closing this file leads to a race. This reproduces
+easily in a kvm virtual machine:
+
+ts# cat openptp0.c
+int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); }
+ts# uname -r
+5.5.0-rc3-46cf053e
+ts# cat /proc/cmdline
+... slub_debug=FZP
+ts# modprobe ptp_kvm
+ts# ./openptp0 &
+[1] 670
+opened /dev/ptp0, sleeping 10s...
+ts# rmmod ptp_kvm
+ts# ls /dev/ptp*
+ls: cannot access '/dev/ptp*': No such file or directory
+ts# ...woken up
+[ 48.010809] general protection fault: 0000 [#1] SMP
+[ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25
+[ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ...
+[ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80
+[ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202
+[ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0
+[ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b
+[ 48.019470] ... ^^^ a slub poison
+[ 48.023854] Call Trace:
+[ 48.024050] __fput+0x21f/0x240
+[ 48.024288] task_work_run+0x79/0x90
+[ 48.024555] do_exit+0x2af/0xab0
+[ 48.024799] ? vfs_write+0x16a/0x190
+[ 48.025082] do_group_exit+0x35/0x90
+[ 48.025387] __x64_sys_exit_group+0xf/0x10
+[ 48.025737] do_syscall_64+0x3d/0x130
+[ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[ 48.026479] RIP: 0033:0x7f53b12082f6
+[ 48.026792] ...
+[ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm]
+[ 48.045001] Fixing recursive fault but reboot is needed!
+
+This happens in:
+
+static void __fput(struct file *file)
+{ ...
+ if (file->f_op->release)
+ file->f_op->release(inode, file); <<< cdev is kfree'd here
+ if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
+ !(mode & FMODE_PATH))) {
+ cdev_put(inode->i_cdev); <<< cdev fields are accessed here
+
+Namely:
+
+__fput()
+ posix_clock_release()
+ kref_put(&clk->kref, delete_clock) <<< the last reference
+ delete_clock()
+ delete_ptp_clock()
+ kfree(ptp) <<< cdev is embedded in ptp
+ cdev_put
+ module_put(p->owner) <<< *p is kfree'd, bang!
+
+Here cdev is embedded in posix_clock which is embedded in ptp_clock.
+The race happens because ptp_clock's lifetime is controlled by two
+refcounts: kref and cdev.kobj in posix_clock. This is wrong.
+
+Make ptp_clock's sysfs device a parent of cdev with cdev_device_add()
+created especially for such cases. This way the parent device with its
+ptp_clock is not released until all references to the cdev are released.
+This adds a requirement that an initialized but not exposed struct
+device should be provided to posix_clock_register() by a caller instead
+of a simple dev_t.
+
+This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix
+the race between the release of watchdog_core_data and cdev"). See
+details of the implementation in the commit 233ed09d7fda ("chardev: add
+helper function to register char devs with a struct device").
+
+Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u
+Analyzed-by: Stephen Johnston <sjohnsto@redhat.com>
+Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com>
+Signed-off-by: Vladis Dronov <vdronov@redhat.com>
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ptp/ptp_clock.c | 31 ++++++++++++++-----------------
+ drivers/ptp/ptp_private.h | 2 +-
+ include/linux/posix-clock.h | 19 +++++++++++--------
+ kernel/time/posix-clock.c | 31 +++++++++++++------------------
+ 4 files changed, 39 insertions(+), 44 deletions(-)
+
+--- a/drivers/ptp/ptp_clock.c
++++ b/drivers/ptp/ptp_clock.c
+@@ -175,9 +175,9 @@ static struct posix_clock_operations ptp
+ .read = ptp_read,
+ };
+
+-static void delete_ptp_clock(struct posix_clock *pc)
++static void ptp_clock_release(struct device *dev)
+ {
+- struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
++ struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev);
+
+ mutex_destroy(&ptp->tsevq_mux);
+ mutex_destroy(&ptp->pincfg_mux);
+@@ -222,7 +222,6 @@ struct ptp_clock *ptp_clock_register(str
+ }
+
+ ptp->clock.ops = ptp_clock_ops;
+- ptp->clock.release = delete_ptp_clock;
+ ptp->info = info;
+ ptp->devid = MKDEV(major, index);
+ ptp->index = index;
+@@ -249,15 +248,6 @@ struct ptp_clock *ptp_clock_register(str
+ if (err)
+ goto no_pin_groups;
+
+- /* Create a new device in our class. */
+- ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid,
+- ptp, ptp->pin_attr_groups,
+- "ptp%d", ptp->index);
+- if (IS_ERR(ptp->dev)) {
+- err = PTR_ERR(ptp->dev);
+- goto no_device;
+- }
+-
+ /* Register a new PPS source. */
+ if (info->pps) {
+ struct pps_source_info pps;
+@@ -273,8 +263,18 @@ struct ptp_clock *ptp_clock_register(str
+ }
+ }
+
+- /* Create a posix clock. */
+- err = posix_clock_register(&ptp->clock, ptp->devid);
++ /* Initialize a new device of our class in our clock structure. */
++ device_initialize(&ptp->dev);
++ ptp->dev.devt = ptp->devid;
++ ptp->dev.class = ptp_class;
++ ptp->dev.parent = parent;
++ ptp->dev.groups = ptp->pin_attr_groups;
++ ptp->dev.release = ptp_clock_release;
++ dev_set_drvdata(&ptp->dev, ptp);
++ dev_set_name(&ptp->dev, "ptp%d", ptp->index);
++
++ /* Create a posix clock and link it to the device. */
++ err = posix_clock_register(&ptp->clock, &ptp->dev);
+ if (err) {
+ pr_err("failed to create posix clock\n");
+ goto no_clock;
+@@ -286,8 +286,6 @@ no_clock:
+ if (ptp->pps_source)
+ pps_unregister_source(ptp->pps_source);
+ no_pps:
+- device_destroy(ptp_class, ptp->devid);
+-no_device:
+ ptp_cleanup_pin_groups(ptp);
+ no_pin_groups:
+ if (ptp->kworker)
+@@ -317,7 +315,6 @@ int ptp_clock_unregister(struct ptp_cloc
+ if (ptp->pps_source)
+ pps_unregister_source(ptp->pps_source);
+
+- device_destroy(ptp_class, ptp->devid);
+ ptp_cleanup_pin_groups(ptp);
+
+ posix_clock_unregister(&ptp->clock);
+--- a/drivers/ptp/ptp_private.h
++++ b/drivers/ptp/ptp_private.h
+@@ -41,7 +41,7 @@ struct timestamp_event_queue {
+
+ struct ptp_clock {
+ struct posix_clock clock;
+- struct device *dev;
++ struct device dev;
+ struct ptp_clock_info *info;
+ dev_t devid;
+ int index; /* index into clocks.map */
+--- a/include/linux/posix-clock.h
++++ b/include/linux/posix-clock.h
+@@ -82,29 +82,32 @@ struct posix_clock_operations {
+ *
+ * @ops: Functional interface to the clock
+ * @cdev: Character device instance for this clock
+- * @kref: Reference count.
++ * @dev: Pointer to the clock's device.
+ * @rwsem: Protects the 'zombie' field from concurrent access.
+ * @zombie: If 'zombie' is true, then the hardware has disappeared.
+- * @release: A function to free the structure when the reference count reaches
+- * zero. May be NULL if structure is statically allocated.
+ *
+ * Drivers should embed their struct posix_clock within a private
+ * structure, obtaining a reference to it during callbacks using
+ * container_of().
++ *
++ * Drivers should supply an initialized but not exposed struct device
++ * to posix_clock_register(). It is used to manage lifetime of the
++ * driver's private structure. It's 'release' field should be set to
++ * a release function for this private structure.
+ */
+ struct posix_clock {
+ struct posix_clock_operations ops;
+ struct cdev cdev;
+- struct kref kref;
++ struct device *dev;
+ struct rw_semaphore rwsem;
+ bool zombie;
+- void (*release)(struct posix_clock *clk);
+ };
+
+ /**
+ * posix_clock_register() - register a new clock
+- * @clk: Pointer to the clock. Caller must provide 'ops' and 'release'
+- * @devid: Allocated device id
++ * @clk: Pointer to the clock. Caller must provide 'ops' field
++ * @dev: Pointer to the initialized device. Caller must provide
++ * 'release' field
+ *
+ * A clock driver calls this function to register itself with the
+ * clock device subsystem. If 'clk' points to dynamically allocated
+@@ -113,7 +116,7 @@ struct posix_clock {
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+-int posix_clock_register(struct posix_clock *clk, dev_t devid);
++int posix_clock_register(struct posix_clock *clk, struct device *dev);
+
+ /**
+ * posix_clock_unregister() - unregister a clock
+--- a/kernel/time/posix-clock.c
++++ b/kernel/time/posix-clock.c
+@@ -27,8 +27,6 @@
+
+ #include "posix-timers.h"
+
+-static void delete_clock(struct kref *kref);
+-
+ /*
+ * Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
+ */
+@@ -138,7 +136,7 @@ static int posix_clock_open(struct inode
+ err = 0;
+
+ if (!err) {
+- kref_get(&clk->kref);
++ get_device(clk->dev);
+ fp->private_data = clk;
+ }
+ out:
+@@ -154,7 +152,7 @@ static int posix_clock_release(struct in
+ if (clk->ops.release)
+ err = clk->ops.release(clk);
+
+- kref_put(&clk->kref, delete_clock);
++ put_device(clk->dev);
+
+ fp->private_data = NULL;
+
+@@ -174,38 +172,35 @@ static const struct file_operations posi
+ #endif
+ };
+
+-int posix_clock_register(struct posix_clock *clk, dev_t devid)
++int posix_clock_register(struct posix_clock *clk, struct device *dev)
+ {
+ int err;
+
+- kref_init(&clk->kref);
+ init_rwsem(&clk->rwsem);
+
+ cdev_init(&clk->cdev, &posix_clock_file_operations);
++ err = cdev_device_add(&clk->cdev, dev);
++ if (err) {
++ pr_err("%s unable to add device %d:%d\n",
++ dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt));
++ return err;
++ }
+ clk->cdev.owner = clk->ops.owner;
+- err = cdev_add(&clk->cdev, devid, 1);
++ clk->dev = dev;
+
+- return err;
++ return 0;
+ }
+ EXPORT_SYMBOL_GPL(posix_clock_register);
+
+-static void delete_clock(struct kref *kref)
+-{
+- struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
+-
+- if (clk->release)
+- clk->release(clk);
+-}
+-
+ void posix_clock_unregister(struct posix_clock *clk)
+ {
+- cdev_del(&clk->cdev);
++ cdev_device_del(&clk->cdev, clk->dev);
+
+ down_write(&clk->rwsem);
+ clk->zombie = true;
+ up_write(&clk->rwsem);
+
+- kref_put(&clk->kref, delete_clock);
++ put_device(clk->dev);
+ }
+ EXPORT_SYMBOL_GPL(posix_clock_unregister);
+
net-add-a-read_once-in-skb_peek_tail.patch
net-icmp-fix-data-race-in-cmp_global_allow.patch
hrtimer-annotate-lockless-access-to-timer-state.patch
+net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch
+net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch
+net-stmmac-dwmac-meson8b-fix-the-rgmii-tx-delay-on-meson8b-8m2-socs.patch
+ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch
+tcp-fix-highest_sack-and-highest_sack_seq.patch
+udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch
+vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch
+net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch
+ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch
+gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch
+net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch
+tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch
+vti-do-not-confirm-neighbor-when-do-pmtu-update.patch
+sit-do-not-confirm-neighbor-when-do-pmtu-update.patch
+net-dst-do-not-confirm-neighbor-for-vxlan-and-geneve-pmtu-update.patch
+gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch
+net-marvell-mvpp2-phylink-requires-the-link-interrupt.patch
+tcp-dccp-fix-possible-race-__inet_lookup_established.patch
+tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch
+gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch
+gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch
+gtp-avoid-zero-size-hashtable.patch
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:15 +0800
+Subject: sit: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 4d42df46d6372ece4cb4279870b46c2ea7304a47 ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/sit.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/sit.c
++++ b/net/ipv6/sit.c
+@@ -943,7 +943,7 @@ static netdev_tx_t ipip6_tunnel_xmit(str
+ }
+
+ if (tunnel->parms.iph.daddr)
+- skb_dst_update_pmtu(skb, mtu);
++ skb_dst_update_pmtu_no_confirm(skb, mtu);
+
+ if (skb->len > mtu && !skb_is_gso(skb)) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 13 Dec 2019 18:20:41 -0800
+Subject: tcp/dccp: fix possible race __inet_lookup_established()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 8dbd76e79a16b45b2ccb01d2f2e08dbf64e71e40 ]
+
+Michal Kubecek and Firo Yang did a very nice analysis of crashes
+happening in __inet_lookup_established().
+
+Since a TCP socket can go from TCP_ESTABLISH to TCP_LISTEN
+(via a close()/socket()/listen() cycle) without a RCU grace period,
+I should not have changed listeners linkage in their hash table.
+
+They must use the nulls protocol (Documentation/RCU/rculist_nulls.txt),
+so that a lookup can detect a socket in a hash list was moved in
+another one.
+
+Since we added code in commit d296ba60d8e2 ("soreuseport: Resolve
+merge conflict for v4/v6 ordering fix"), we have to add
+hlist_nulls_add_tail_rcu() helper.
+
+Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Michal Kubecek <mkubecek@suse.cz>
+Reported-by: Firo Yang <firo.yang@suse.com>
+Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
+Link: https://lore.kernel.org/netdev/20191120083919.GH27852@unicorn.suse.cz/
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/rculist_nulls.h | 37 +++++++++++++++++++++++++++++++++++++
+ include/net/inet_hashtables.h | 12 +++++++++---
+ include/net/sock.h | 5 +++++
+ net/ipv4/inet_diag.c | 3 ++-
+ net/ipv4/inet_hashtables.c | 16 ++++++++--------
+ net/ipv4/tcp_ipv4.c | 7 ++++---
+ 6 files changed, 65 insertions(+), 15 deletions(-)
+
+--- a/include/linux/rculist_nulls.h
++++ b/include/linux/rculist_nulls.h
+@@ -101,6 +101,43 @@ static inline void hlist_nulls_add_head_
+ }
+
+ /**
++ * hlist_nulls_add_tail_rcu
++ * @n: the element to add to the hash list.
++ * @h: the list to add to.
++ *
++ * Description:
++ * Adds the specified element to the specified hlist_nulls,
++ * while permitting racing traversals.
++ *
++ * The caller must take whatever precautions are necessary
++ * (such as holding appropriate locks) to avoid racing
++ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
++ * or hlist_nulls_del_rcu(), running on this same list.
++ * However, it is perfectly legal to run concurrently with
++ * the _rcu list-traversal primitives, such as
++ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
++ * problems on Alpha CPUs. Regardless of the type of CPU, the
++ * list-traversal primitive must be guarded by rcu_read_lock().
++ */
++static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
++ struct hlist_nulls_head *h)
++{
++ struct hlist_nulls_node *i, *last = NULL;
++
++ /* Note: write side code, so rcu accessors are not needed. */
++ for (i = h->first; !is_a_nulls(i); i = i->next)
++ last = i;
++
++ if (last) {
++ n->next = last->next;
++ n->pprev = &last->next;
++ rcu_assign_pointer(hlist_next_rcu(last), n);
++ } else {
++ hlist_nulls_add_head_rcu(n, h);
++ }
++}
++
++/**
+ * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_nulls_node to use as a loop cursor.
+--- a/include/net/inet_hashtables.h
++++ b/include/net/inet_hashtables.h
+@@ -106,13 +106,19 @@ struct inet_bind_hashbucket {
+ struct hlist_head chain;
+ };
+
+-/*
+- * Sockets can be hashed in established or listening table
++/* Sockets can be hashed in established or listening table.
++ * We must use different 'nulls' end-of-chain value for all hash buckets :
++ * A socket might transition from ESTABLISH to LISTEN state without
++ * RCU grace period. A lookup in ehash table needs to handle this case.
+ */
++#define LISTENING_NULLS_BASE (1U << 29)
+ struct inet_listen_hashbucket {
+ spinlock_t lock;
+ unsigned int count;
+- struct hlist_head head;
++ union {
++ struct hlist_head head;
++ struct hlist_nulls_head nulls_head;
++ };
+ };
+
+ /* This is for listening sockets, thus all sockets which possess wildcards. */
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -721,6 +721,11 @@ static inline void __sk_nulls_add_node_r
+ hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
+ }
+
++static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list)
++{
++ hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
++}
++
+ static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
+ {
+ sock_hold(sk);
+--- a/net/ipv4/inet_diag.c
++++ b/net/ipv4/inet_diag.c
+@@ -918,11 +918,12 @@ void inet_diag_dump_icsk(struct inet_has
+
+ for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
+ struct inet_listen_hashbucket *ilb;
++ struct hlist_nulls_node *node;
+
+ num = 0;
+ ilb = &hashinfo->listening_hash[i];
+ spin_lock(&ilb->lock);
+- sk_for_each(sk, &ilb->head) {
++ sk_nulls_for_each(sk, node, &ilb->nulls_head) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (!net_eq(sock_net(sk), net))
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -560,10 +560,11 @@ static int inet_reuseport_add_sock(struc
+ struct inet_listen_hashbucket *ilb)
+ {
+ struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
++ const struct hlist_nulls_node *node;
+ struct sock *sk2;
+ kuid_t uid = sock_i_uid(sk);
+
+- sk_for_each_rcu(sk2, &ilb->head) {
++ sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) {
+ if (sk2 != sk &&
+ sk2->sk_family == sk->sk_family &&
+ ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
+@@ -599,9 +600,9 @@ int __inet_hash(struct sock *sk, struct
+ }
+ if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+ sk->sk_family == AF_INET6)
+- hlist_add_tail_rcu(&sk->sk_node, &ilb->head);
++ __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
+ else
+- hlist_add_head_rcu(&sk->sk_node, &ilb->head);
++ __sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
+ inet_hash2(hashinfo, sk);
+ ilb->count++;
+ sock_set_flag(sk, SOCK_RCU_FREE);
+@@ -650,11 +651,9 @@ void inet_unhash(struct sock *sk)
+ reuseport_detach_sock(sk);
+ if (ilb) {
+ inet_unhash2(hashinfo, sk);
+- __sk_del_node_init(sk);
+- ilb->count--;
+- } else {
+- __sk_nulls_del_node_init_rcu(sk);
++ ilb->count--;
+ }
++ __sk_nulls_del_node_init_rcu(sk);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ unlock:
+ spin_unlock_bh(lock);
+@@ -790,7 +789,8 @@ void inet_hashinfo_init(struct inet_hash
+
+ for (i = 0; i < INET_LHTABLE_SIZE; i++) {
+ spin_lock_init(&h->listening_hash[i].lock);
+- INIT_HLIST_HEAD(&h->listening_hash[i].head);
++ INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head,
++ i + LISTENING_NULLS_BASE);
+ h->listening_hash[i].count = 0;
+ }
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -2020,13 +2020,14 @@ static void *listening_get_next(struct s
+ struct tcp_iter_state *st = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct inet_listen_hashbucket *ilb;
++ struct hlist_nulls_node *node;
+ struct sock *sk = cur;
+
+ if (!sk) {
+ get_head:
+ ilb = &tcp_hashinfo.listening_hash[st->bucket];
+ spin_lock(&ilb->lock);
+- sk = sk_head(&ilb->head);
++ sk = sk_nulls_head(&ilb->nulls_head);
+ st->offset = 0;
+ goto get_sk;
+ }
+@@ -2034,9 +2035,9 @@ get_head:
+ ++st->num;
+ ++st->offset;
+
+- sk = sk_next(sk);
++ sk = sk_nulls_next(sk);
+ get_sk:
+- sk_for_each_from(sk) {
++ sk_nulls_for_each_from(sk, node) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (sk->sk_family == afinfo->family)
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 12 Dec 2019 12:55:29 -0800
+Subject: tcp: do not send empty skb from tcp_write_xmit()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 1f85e6267caca44b30c54711652b0726fadbb131 ]
+
+Backport of commit fdfc5c8594c2 ("tcp: remove empty skb from
+write queue in error cases") in linux-4.14 stable triggered
+various bugs. One of them has been fixed in commit ba2ddb43f270
+("tcp: Don't dequeue SYN/FIN-segments from write-queue"), but
+we still have crashes in some occasions.
+
+Root-cause is that when tcp_sendmsg() has allocated a fresh
+skb and could not append a fragment before being blocked
+in sk_stream_wait_memory(), tcp_write_xmit() might be called
+and decide to send this fresh and empty skb.
+
+Sending an empty packet is not only silly, it might have caused
+many issues we had in the past with tp->packets_out being
+out of sync.
+
+Fixes: c65f7f00c587 ("[TCP]: Simplify SKB data portion allocation with NETIF_F_SG.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Christoph Paasch <cpaasch@apple.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Cc: Jason Baron <jbaron@akamai.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2376,6 +2376,14 @@ static bool tcp_write_xmit(struct sock *
+ if (tcp_small_queue_check(sk, skb, 0))
+ break;
+
++ /* Argh, we hit an empty skb(), presumably a thread
++ * is sleeping in sendmsg()/sk_stream_wait_memory().
++ * We do not want to send a pure-ack packet and have
++ * a strange looking rtx queue with empty packet(s).
++ */
++ if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
++ break;
++
+ if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
+ break;
+
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Cambda Zhu <cambda@linux.alibaba.com>
+Date: Fri, 27 Dec 2019 16:52:37 +0800
+Subject: tcp: Fix highest_sack and highest_sack_seq
+
+From: Cambda Zhu <cambda@linux.alibaba.com>
+
+[ Upstream commit 853697504de043ff0bfd815bd3a64de1dce73dc7 ]
+
+>From commit 50895b9de1d3 ("tcp: highest_sack fix"), the logic about
+setting tp->highest_sack to the head of the send queue was removed.
+Of course the logic is error prone, but it is logical. Before we
+remove the pointer to the highest sack skb and use the seq instead,
+we need to set tp->highest_sack to NULL when there is no skb after
+the last sack, and then replace NULL with the real skb when new skb
+inserted into the rtx queue, because the NULL means the highest sack
+seq is tp->snd_nxt. If tp->highest_sack is NULL and new data sent,
+the next ACK with sack option will increase tp->reordering unexpectedly.
+
+This patch sets tp->highest_sack to the tail of the rtx queue if
+it's NULL and new data is sent. The patch keeps the rule that the
+highest_sack can only be maintained by sack processing, except for
+this only case.
+
+Fixes: 50895b9de1d3 ("tcp: highest_sack fix")
+Signed-off-by: Cambda Zhu <cambda@linux.alibaba.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -60,6 +60,9 @@ static void tcp_event_new_data_sent(stru
+ __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
+
++ if (tp->highest_sack == NULL)
++ tp->highest_sack = skb;
++
+ tp->packets_out += tcp_skb_pcount(skb);
+ if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
+ tcp_rearm_rto(sk);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:13 +0800
+Subject: tunnel: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 7a1592bcb15d71400a98632727791d1e68ea0ee8 ]
+
+When do tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+v5: No Change.
+v4: Update commit description
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Fixes: 0dec879f636f ("net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP")
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Tested-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_tunnel.c | 2 +-
+ net/ipv6/ip6_tunnel.c | 4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -513,7 +513,7 @@ static int tnl_update_pmtu(struct net_de
+ else
+ mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+
+- skb_dst_update_pmtu(skb, mtu);
++ skb_dst_update_pmtu_no_confirm(skb, mtu);
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ if (!skb_is_gso(skb) &&
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -645,7 +645,7 @@ ip4ip6_err(struct sk_buff *skb, struct i
+ if (rel_info > dst_mtu(skb_dst(skb2)))
+ goto out;
+
+- skb_dst_update_pmtu(skb2, rel_info);
++ skb_dst_update_pmtu_no_confirm(skb2, rel_info);
+ }
+
+ icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
+@@ -1137,7 +1137,7 @@ route_lookup:
+ mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
+ IPV6_MIN_MTU : IPV4_MIN_MTU);
+
+- skb_dst_update_pmtu(skb, mtu);
++ skb_dst_update_pmtu_no_confirm(skb, mtu);
+ if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
+ *pmtu = mtu;
+ err = -EMSGSIZE;
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Antonio Messina <amessina@google.com>
+Date: Thu, 19 Dec 2019 15:08:03 +0100
+Subject: udp: fix integer overflow while computing available space in sk_rcvbuf
+
+From: Antonio Messina <amessina@google.com>
+
+[ Upstream commit feed8a4fc9d46c3126fb9fcae0e9248270c6321a ]
+
+When the size of the receive buffer for a socket is close to 2^31 when
+computing if we have enough space in the buffer to copy a packet from
+the queue to the buffer we might hit an integer overflow.
+
+When an user set net.core.rmem_default to a value close to 2^31 UDP
+packets are dropped because of this overflow. This can be visible, for
+instance, with failure to resolve hostnames.
+
+This can be fixed by casting sk_rcvbuf (which is an int) to unsigned
+int, similarly to how it is done in TCP.
+
+Signed-off-by: Antonio Messina <amessina@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1412,7 +1412,7 @@ int __udp_enqueue_schedule_skb(struct so
+ * queue contains some other skb
+ */
+ rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
+- if (rmem > (size + sk->sk_rcvbuf))
++ if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
+ goto uncharge_drop;
+
+ spin_lock(&list->lock);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Stefano Garzarella <sgarzare@redhat.com>
+Date: Fri, 6 Dec 2019 15:39:12 +0100
+Subject: vhost/vsock: accept only packets with the right dst_cid
+
+From: Stefano Garzarella <sgarzare@redhat.com>
+
+[ Upstream commit 8a3cc29c316c17de590e3ff8b59f3d6cbfd37b0a ]
+
+When we receive a new packet from the guest, we check if the
+src_cid is correct, but we forgot to check the dst_cid.
+
+The host should accept only packets where dst_cid is
+equal to the host CID.
+
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/vsock.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/vhost/vsock.c
++++ b/drivers/vhost/vsock.c
+@@ -436,7 +436,9 @@ static void vhost_vsock_handle_tx_kick(s
+ virtio_transport_deliver_tap_pkt(pkt);
+
+ /* Only accept correctly addressed packets */
+- if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
++ if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid &&
++ le64_to_cpu(pkt->hdr.dst_cid) ==
++ vhost_transport_get_local_cid())
+ virtio_transport_recv_pkt(pkt);
+ else
+ virtio_transport_free_pkt(pkt);
--- /dev/null
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:14 +0800
+Subject: vti: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 8247a79efa2f28b44329f363272550c1738377de ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+Although vti and vti6 are immune to this problem because they are IFF_NOARP
+interfaces, as Guillaume pointed. There is still no sense to confirm neighbour
+here.
+
+v5: Update commit description.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+ dst_ops.update_pmtu to control whether we should do neighbor confirm.
+ Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_vti.c | 2 +-
+ net/ipv6/ip6_vti.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_vti.c
++++ b/net/ipv4/ip_vti.c
+@@ -235,7 +235,7 @@ static netdev_tx_t vti_xmit(struct sk_bu
+
+ mtu = dst_mtu(dst);
+ if (skb->len > mtu) {
+- skb_dst_update_pmtu(skb, mtu);
++ skb_dst_update_pmtu_no_confirm(skb, mtu);
+ if (skb->protocol == htons(ETH_P_IP)) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+--- a/net/ipv6/ip6_vti.c
++++ b/net/ipv6/ip6_vti.c
+@@ -483,7 +483,7 @@ vti6_xmit(struct sk_buff *skb, struct ne
+
+ mtu = dst_mtu(dst);
+ if (skb->len > mtu) {
+- skb_dst_update_pmtu(skb, mtu);
++ skb_dst_update_pmtu_no_confirm(skb, mtu);
+
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (mtu < IPV6_MIN_MTU)