4.19-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 1 Jan 2020 21:36:47 +0000 (22:36 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 1 Jan 2020 21:36:47 +0000 (22:36 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 1 Jan 2020 21:36:47 +0000 (22:36 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 1 Jan 2020 21:36:47 +0000 (22:36 +0100)
diff --git a/queue-4.19/gtp-avoid-zero-size-hashtable.patch b/queue-4.19/gtp-avoid-zero-size-hashtable.patch

new file mode 100644 (file)

index 0000000..54356cb
--- /dev/null
+++ b/queue-4.19/gtp-avoid-zero-size-hashtable.patch
@@ -0,0 +1,39 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:48 +0000
+Subject: gtp: avoid zero size hashtable
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 6a902c0f31993ab02e1b6ea7085002b9c9083b6a ]
+
+GTP default hashtable size is 1024 and userspace could set specific
+hashtable size with IFLA_GTP_PDP_HASHSIZE. If hashtable size is set to 0
+from userspace,  hashtable will not work and panic will occur.
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -671,10 +671,13 @@ static int gtp_newlink(struct net *src_n
+       if (err < 0)
+               return err;
+ 
+-      if (!data[IFLA_GTP_PDP_HASHSIZE])
++      if (!data[IFLA_GTP_PDP_HASHSIZE]) {
+               hashsize = 1024;
+-      else
++      } else {
+               hashsize = nla_get_u32(data[IFLA_GTP_PDP_HASHSIZE]);
++              if (!hashsize)
++                      hashsize = 1024;
++      }
+ 
+       err = gtp_hashtable_new(gtp, hashsize);
+       if (err < 0)
diff --git a/queue-4.19/gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch b/queue-4.19/gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch

new file mode 100644 (file)

index 0000000..72aa2cb
--- /dev/null
+++ b/queue-4.19/gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch
@@ -0,0 +1,88 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:00 +0000
+Subject: gtp: do not allow adding duplicate tid and ms_addr pdp context
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 6b01b1d9b2d38dc84ac398bfe9f00baff06a31e5 ]
+
+GTP RX packet path lookups pdp context with TID. If duplicate TID pdp
+contexts are existing in the list, it couldn't select correct pdp context.
+So, TID value  should be unique.
+GTP TX packet path lookups pdp context with ms_addr. If duplicate ms_addr pdp
+contexts are existing in the list, it couldn't select correct pdp context.
+So, ms_addr value should be unique.
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c |   32 ++++++++++++++++++++++----------
+ 1 file changed, 22 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -931,24 +931,31 @@ static void ipv4_pdp_fill(struct pdp_ctx
+       }
+ }
+ 
+-static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk,
+-                      struct genl_info *info)
++static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
++                     struct genl_info *info)
+ {
++      struct pdp_ctx *pctx, *pctx_tid = NULL;
+       struct net_device *dev = gtp->dev;
+       u32 hash_ms, hash_tid = 0;
+-      struct pdp_ctx *pctx;
++      unsigned int version;
+       bool found = false;
+       __be32 ms_addr;
+ 
+       ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]);
+       hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size;
++      version = nla_get_u32(info->attrs[GTPA_VERSION]);
+ 
+-      hlist_for_each_entry_rcu(pctx, &gtp->addr_hash[hash_ms], hlist_addr) {
+-              if (pctx->ms_addr_ip4.s_addr == ms_addr) {
+-                      found = true;
+-                      break;
+-              }
+-      }
++      pctx = ipv4_pdp_find(gtp, ms_addr);
++      if (pctx)
++              found = true;
++      if (version == GTP_V0)
++              pctx_tid = gtp0_pdp_find(gtp,
++                                       nla_get_u64(info->attrs[GTPA_TID]));
++      else if (version == GTP_V1)
++              pctx_tid = gtp1_pdp_find(gtp,
++                                       nla_get_u32(info->attrs[GTPA_I_TEI]));
++      if (pctx_tid)
++              found = true;
+ 
+       if (found) {
+               if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
+@@ -956,6 +963,11 @@ static int ipv4_pdp_add(struct gtp_dev *
+               if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE)
+                       return -EOPNOTSUPP;
+ 
++              if (pctx && pctx_tid)
++                      return -EEXIST;
++              if (!pctx)
++                      pctx = pctx_tid;
++
+               ipv4_pdp_fill(pctx, info);
+ 
+               if (pctx->gtp_version == GTP_V0)
+@@ -1079,7 +1091,7 @@ static int gtp_genl_new_pdp(struct sk_bu
+               goto out_unlock;
+       }
+ 
+-      err = ipv4_pdp_add(gtp, sk, info);
++      err = gtp_pdp_add(gtp, sk, info);
+ 
+ out_unlock:
+       rcu_read_unlock();
diff --git a/queue-4.19/gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-4.19/gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch

new file mode 100644 (file)

index 0000000..fec912a
--- /dev/null
+++ b/queue-4.19/gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch
@@ -0,0 +1,43 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:11 +0800
+Subject: gtp: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 6e9105c73f8d2163d12d5dfd762fd75483ed30f5 ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+Although GTP only support ipv4 right now, and __ip_rt_update_pmtu() does not
+call dst_confirm_neigh(), we still set it to false to keep consistency with
+IPv6 code.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -545,7 +545,7 @@ static int gtp_build_skb_ip4(struct sk_b
+               mtu = dst_mtu(&rt->dst);
+       }
+ 
+-      rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true);
++      rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false);
+ 
+       if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
+           mtu < ntohs(iph->tot_len)) {
diff --git a/queue-4.19/gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch b/queue-4.19/gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch

new file mode 100644 (file)

index 0000000..d87e662
--- /dev/null
+++ b/queue-4.19/gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch
@@ -0,0 +1,161 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:34 +0000
+Subject: gtp: fix an use-after-free in ipv4_pdp_find()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 94dc550a5062030569d4aa76e10e50c8fc001930 ]
+
+ipv4_pdp_find() is called in TX packet path of GTP.
+ipv4_pdp_find() internally uses gtp->tid_hash to lookup pdp context.
+In the current code, gtp->tid_hash and gtp->addr_hash are freed by
+->dellink(), which is gtp_dellink().
+But gtp_dellink() would be called while packets are processing.
+So, gtp_dellink() should not free gtp->tid_hash and gtp->addr_hash.
+Instead, dev->priv_destructor() would be used because this callback
+is called after all packet processing safely.
+
+Test commands:
+    ip link add veth1 type veth peer name veth2
+    ip a a 172.0.0.1/24 dev veth1
+    ip link set veth1 up
+    ip a a 172.99.0.1/32 dev lo
+
+    gtp-link add gtp1 &
+
+    gtp-tunnel add gtp1 v1 200 100 172.99.0.2 172.0.0.2
+    ip r a  172.99.0.2/32 dev gtp1
+    ip link set gtp1 mtu 1500
+
+    ip netns add ns2
+    ip link set veth2 netns ns2
+    ip netns exec ns2 ip a a 172.0.0.2/24 dev veth2
+    ip netns exec ns2 ip link set veth2 up
+    ip netns exec ns2 ip a a 172.99.0.2/32 dev lo
+    ip netns exec ns2 ip link set lo up
+
+    ip netns exec ns2 gtp-link add gtp2 &
+    ip netns exec ns2 gtp-tunnel add gtp2 v1 100 200 172.99.0.1 172.0.0.1
+    ip netns exec ns2 ip r a 172.99.0.1/32 dev gtp2
+    ip netns exec ns2 ip link set gtp2 mtu 1500
+
+    hping3 172.99.0.2 -2 --flood &
+    ip link del gtp1
+
+Splat looks like:
+[   72.568081][ T1195] BUG: KASAN: use-after-free in ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[   72.568916][ T1195] Read of size 8 at addr ffff8880b9a35d28 by task hping3/1195
+[   72.569631][ T1195]
+[   72.569861][ T1195] CPU: 2 PID: 1195 Comm: hping3 Not tainted 5.5.0-rc1 #199
+[   72.570547][ T1195] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[   72.571438][ T1195] Call Trace:
+[   72.571764][ T1195]  dump_stack+0x96/0xdb
+[   72.572171][ T1195]  ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[   72.572761][ T1195]  print_address_description.constprop.5+0x1be/0x360
+[   72.573400][ T1195]  ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[   72.573971][ T1195]  ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[   72.574544][ T1195]  __kasan_report+0x12a/0x16f
+[   72.575014][ T1195]  ? ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[   72.575593][ T1195]  kasan_report+0xe/0x20
+[   72.576004][ T1195]  ipv4_pdp_find.isra.12+0x130/0x170 [gtp]
+[   72.576577][ T1195]  gtp_build_skb_ip4+0x199/0x1420 [gtp]
+[ ... ]
+[   72.647671][ T1195] BUG: unable to handle page fault for address: ffff8880b9a35d28
+[   72.648512][ T1195] #PF: supervisor read access in kernel mode
+[   72.649158][ T1195] #PF: error_code(0x0000) - not-present page
+[   72.649849][ T1195] PGD a6c01067 P4D a6c01067 PUD 11fb07067 PMD 11f939067 PTE 800fffff465ca060
+[   72.652958][ T1195] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
+[   72.653834][ T1195] CPU: 2 PID: 1195 Comm: hping3 Tainted: G    B             5.5.0-rc1 #199
+[   72.668062][ T1195] RIP: 0010:ipv4_pdp_find.isra.12+0x86/0x170 [gtp]
+[ ... ]
+[   72.679168][ T1195] Call Trace:
+[   72.679603][ T1195]  gtp_build_skb_ip4+0x199/0x1420 [gtp]
+[   72.681915][ T1195]  ? ipv4_pdp_find.isra.12+0x170/0x170 [gtp]
+[   72.682513][ T1195]  ? lock_acquire+0x164/0x3b0
+[   72.682966][ T1195]  ? gtp_dev_xmit+0x35e/0x890 [gtp]
+[   72.683481][ T1195]  gtp_dev_xmit+0x3c2/0x890 [gtp]
+[ ... ]
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c |   34 +++++++++++++++++-----------------
+ 1 file changed, 17 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -644,9 +644,16 @@ static void gtp_link_setup(struct net_de
+ }
+ 
+ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
+-static void gtp_hashtable_free(struct gtp_dev *gtp);
+ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]);
+ 
++static void gtp_destructor(struct net_device *dev)
++{
++      struct gtp_dev *gtp = netdev_priv(dev);
++
++      kfree(gtp->addr_hash);
++      kfree(gtp->tid_hash);
++}
++
+ static int gtp_newlink(struct net *src_net, struct net_device *dev,
+                      struct nlattr *tb[], struct nlattr *data[],
+                      struct netlink_ext_ack *extack)
+@@ -681,13 +688,15 @@ static int gtp_newlink(struct net *src_n
+ 
+       gn = net_generic(dev_net(dev), gtp_net_id);
+       list_add_rcu(&gtp->list, &gn->gtp_dev_list);
++      dev->priv_destructor = gtp_destructor;
+ 
+       netdev_dbg(dev, "registered new GTP interface\n");
+ 
+       return 0;
+ 
+ out_hashtable:
+-      gtp_hashtable_free(gtp);
++      kfree(gtp->addr_hash);
++      kfree(gtp->tid_hash);
+ out_encap:
+       gtp_encap_disable(gtp);
+       return err;
+@@ -696,9 +705,14 @@ out_encap:
+ static void gtp_dellink(struct net_device *dev, struct list_head *head)
+ {
+       struct gtp_dev *gtp = netdev_priv(dev);
++      struct pdp_ctx *pctx;
++      int i;
++
++      for (i = 0; i < gtp->hash_size; i++)
++              hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid)
++                      pdp_context_delete(pctx);
+ 
+       gtp_encap_disable(gtp);
+-      gtp_hashtable_free(gtp);
+       list_del_rcu(&gtp->list);
+       unregister_netdevice_queue(dev, head);
+ }
+@@ -776,20 +790,6 @@ err1:
+       return -ENOMEM;
+ }
+ 
+-static void gtp_hashtable_free(struct gtp_dev *gtp)
+-{
+-      struct pdp_ctx *pctx;
+-      int i;
+-
+-      for (i = 0; i < gtp->hash_size; i++)
+-              hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid)
+-                      pdp_context_delete(pctx);
+-
+-      synchronize_rcu();
+-      kfree(gtp->addr_hash);
+-      kfree(gtp->tid_hash);
+-}
+-
+ static struct sock *gtp_encap_enable_socket(int fd, int type,
+                                           struct gtp_dev *gtp)
+ {
diff --git a/queue-4.19/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch b/queue-4.19/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch

new file mode 100644 (file)

index 0000000..c632112
--- /dev/null
+++ b/queue-4.19/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch
@@ -0,0 +1,102 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Wed, 11 Dec 2019 08:23:17 +0000
+Subject: gtp: fix wrong condition in gtp_genl_dump_pdp()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 94a6d9fb88df43f92d943c32b84ce398d50bf49f ]
+
+gtp_genl_dump_pdp() is ->dumpit() callback of GTP module and it is used
+to dump pdp contexts. it would be re-executed because of dump packet size.
+
+If dump packet size is too big, it saves current dump pointer
+(gtp interface pointer, bucket, TID value) then it restarts dump from
+last pointer.
+Current GTP code allows adding zero TID pdp context but dump code
+ignores zero TID value. So, last dump pointer will not be found.
+
+In addition, this patch adds missing rcu_read_lock() in
+gtp_genl_dump_pdp().
+
+Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c |   36 +++++++++++++++++++-----------------
+ 1 file changed, 19 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -42,7 +42,6 @@ struct pdp_ctx {
+       struct hlist_node       hlist_addr;
+ 
+       union {
+-              u64             tid;
+               struct {
+                       u64     tid;
+                       u16     flow;
+@@ -1249,43 +1248,46 @@ static int gtp_genl_dump_pdp(struct sk_b
+                               struct netlink_callback *cb)
+ {
+       struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp;
++      int i, j, bucket = cb->args[0], skip = cb->args[1];
+       struct net *net = sock_net(skb->sk);
+-      struct gtp_net *gn = net_generic(net, gtp_net_id);
+-      unsigned long tid = cb->args[1];
+-      int i, k = cb->args[0], ret;
+       struct pdp_ctx *pctx;
++      struct gtp_net *gn;
++
++      gn = net_generic(net, gtp_net_id);
+ 
+       if (cb->args[4])
+               return 0;
+ 
++      rcu_read_lock();
+       list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) {
+               if (last_gtp && last_gtp != gtp)
+                       continue;
+               else
+                       last_gtp = NULL;
+ 
+-              for (i = k; i < gtp->hash_size; i++) {
+-                      hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid) {
+-                              if (tid && tid != pctx->u.tid)
+-                                      continue;
+-                              else
+-                                      tid = 0;
+-
+-                              ret = gtp_genl_fill_info(skb,
+-                                                       NETLINK_CB(cb->skb).portid,
+-                                                       cb->nlh->nlmsg_seq,
+-                                                       cb->nlh->nlmsg_type, pctx);
+-                              if (ret < 0) {
++              for (i = bucket; i < gtp->hash_size; i++) {
++                      j = 0;
++                      hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i],
++                                               hlist_tid) {
++                              if (j >= skip &&
++                                  gtp_genl_fill_info(skb,
++                                          NETLINK_CB(cb->skb).portid,
++                                          cb->nlh->nlmsg_seq,
++                                          cb->nlh->nlmsg_type, pctx)) {
+                                       cb->args[0] = i;
+-                                      cb->args[1] = pctx->u.tid;
++                                      cb->args[1] = j;
+                                       cb->args[2] = (unsigned long)gtp;
+                                       goto out;
+                               }
++                              j++;
+                       }
++                      skip = 0;
+               }
++              bucket = 0;
+       }
+       cb->args[4] = 1;
+ out:
++      rcu_read_unlock();
+       return skb->len;
+ }
+ 
diff --git a/queue-4.19/ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-4.19/ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch

new file mode 100644 (file)

index 0000000..dce3678
--- /dev/null
+++ b/queue-4.19/ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch
@@ -0,0 +1,47 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:10 +0800
+Subject: ip6_gre: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 675d76ad0ad5bf41c9a129772ef0aba8f57ea9a7 ]
+
+When we do ipv6 gre pmtu update, we will also do neigh confirm currently.
+This will cause the neigh cache be refreshed and set to REACHABLE before
+xmit.
+
+But if the remote mac address changed, e.g. device is deleted and recreated,
+we will not able to notice this and still use the old mac address as the neigh
+cache is REACHABLE.
+
+Fix this by disable neigh confirm when do pmtu update
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1060,7 +1060,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit
+ 
+       /* TooBig packet may have updated dst->dev's mtu */
+       if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+-              dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true);
++              dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false);
+ 
+       err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+                          NEXTHDR_GRE);
diff --git a/queue-4.19/net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch b/queue-4.19/net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch

new file mode 100644 (file)

index 0000000..9c59898
--- /dev/null
+++ b/queue-4.19/net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch
@@ -0,0 +1,325 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:09 +0800
+Subject: net: add bool confirm_neigh parameter for dst_ops.update_pmtu
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit bd085ef678b2cc8c38c105673dfe8ff8f5ec0c57 ]
+
+The MTU update code is supposed to be invoked in response to real
+networking events that update the PMTU. In IPv6 PMTU update function
+__ip6_rt_update_pmtu() we called dst_confirm_neigh() to update neighbor
+confirmed time.
+
+But for tunnel code, it will call pmtu before xmit, like:
+  - tnl_update_pmtu()
+    - skb_dst_update_pmtu()
+      - ip6_rt_update_pmtu()
+        - __ip6_rt_update_pmtu()
+          - dst_confirm_neigh()
+
+If the tunnel remote dst mac address changed and we still do the neigh
+confirm, we will not be able to update neigh cache and ping6 remote
+will failed.
+
+So for this ip_tunnel_xmit() case, _EVEN_ if the MTU is changed, we
+should not be invoking dst_confirm_neigh() as we have no evidence
+of successful two-way communication at this point.
+
+On the other hand it is also important to keep the neigh reachability fresh
+for TCP flows, so we cannot remove this dst_confirm_neigh() call.
+
+To fix the issue, we have to add a new bool parameter for dst_ops.update_pmtu
+to choose whether we should do neigh update or not. I will add the parameter
+in this patch and set all the callers to true to comply with the previous
+way, and fix the tunnel code one by one on later patches.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Suggested-by: David Miller <davem@davemloft.net>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/gtp.c                |    2 +-
+ include/net/dst.h                |    2 +-
+ include/net/dst_ops.h            |    3 ++-
+ net/bridge/br_nf_core.c          |    3 ++-
+ net/decnet/dn_route.c            |    6 ++++--
+ net/ipv4/inet_connection_sock.c  |    2 +-
+ net/ipv4/route.c                 |    9 ++++++---
+ net/ipv4/xfrm4_policy.c          |    5 +++--
+ net/ipv6/inet6_connection_sock.c |    2 +-
+ net/ipv6/ip6_gre.c               |    2 +-
+ net/ipv6/route.c                 |   22 +++++++++++++++-------
+ net/ipv6/xfrm6_policy.c          |    5 +++--
+ net/netfilter/ipvs/ip_vs_xmit.c  |    2 +-
+ net/sctp/transport.c             |    2 +-
+ 14 files changed, 42 insertions(+), 25 deletions(-)
+
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -545,7 +545,7 @@ static int gtp_build_skb_ip4(struct sk_b
+               mtu = dst_mtu(&rt->dst);
+       }
+ 
+-      rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu);
++      rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true);
+ 
+       if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
+           mtu < ntohs(iph->tot_len)) {
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -527,7 +527,7 @@ static inline void skb_dst_update_pmtu(s
+       struct dst_entry *dst = skb_dst(skb);
+ 
+       if (dst && dst->ops->update_pmtu)
+-              dst->ops->update_pmtu(dst, NULL, skb, mtu);
++              dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
+ }
+ 
+ static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+--- a/include/net/dst_ops.h
++++ b/include/net/dst_ops.h
+@@ -27,7 +27,8 @@ struct dst_ops {
+       struct dst_entry *      (*negative_advice)(struct dst_entry *);
+       void                    (*link_failure)(struct sk_buff *);
+       void                    (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
+-                                             struct sk_buff *skb, u32 mtu);
++                                             struct sk_buff *skb, u32 mtu,
++                                             bool confirm_neigh);
+       void                    (*redirect)(struct dst_entry *dst, struct sock *sk,
+                                           struct sk_buff *skb);
+       int                     (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb);
+--- a/net/bridge/br_nf_core.c
++++ b/net/bridge/br_nf_core.c
+@@ -26,7 +26,8 @@
+ #endif
+ 
+ static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                           struct sk_buff *skb, u32 mtu)
++                           struct sk_buff *skb, u32 mtu,
++                           bool confirm_neigh)
+ {
+ }
+ 
+--- a/net/decnet/dn_route.c
++++ b/net/decnet/dn_route.c
+@@ -118,7 +118,8 @@ static void dn_dst_ifdown(struct dst_ent
+ static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
+ static void dn_dst_link_failure(struct sk_buff *);
+ static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                             struct sk_buff *skb , u32 mtu);
++                             struct sk_buff *skb , u32 mtu,
++                             bool confirm_neigh);
+ static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
+                           struct sk_buff *skb);
+ static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
+@@ -259,7 +260,8 @@ static int dn_dst_gc(struct dst_ops *ops
+  * advertise to the other end).
+  */
+ static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                             struct sk_buff *skb, u32 mtu)
++                             struct sk_buff *skb, u32 mtu,
++                             bool confirm_neigh)
+ {
+       struct dn_route *rt = (struct dn_route *) dst;
+       struct neighbour *n = rt->n;
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -1089,7 +1089,7 @@ struct dst_entry *inet_csk_update_pmtu(s
+               if (!dst)
+                       goto out;
+       }
+-      dst->ops->update_pmtu(dst, sk, NULL, mtu);
++      dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
+ 
+       dst = __sk_dst_check(sk, 0);
+       if (!dst)
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -142,7 +142,8 @@ static unsigned int         ipv4_mtu(const stru
+ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
+ static void            ipv4_link_failure(struct sk_buff *skb);
+ static void            ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                                         struct sk_buff *skb, u32 mtu);
++                                         struct sk_buff *skb, u32 mtu,
++                                         bool confirm_neigh);
+ static void            ip_do_redirect(struct dst_entry *dst, struct sock *sk,
+                                       struct sk_buff *skb);
+ static void           ipv4_dst_destroy(struct dst_entry *dst);
+@@ -1035,7 +1036,8 @@ static void __ip_rt_update_pmtu(struct r
+ }
+ 
+ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                            struct sk_buff *skb, u32 mtu)
++                            struct sk_buff *skb, u32 mtu,
++                            bool confirm_neigh)
+ {
+       struct rtable *rt = (struct rtable *) dst;
+       struct flowi4 fl4;
+@@ -2559,7 +2561,8 @@ static unsigned int ipv4_blackhole_mtu(c
+ }
+ 
+ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                                        struct sk_buff *skb, u32 mtu)
++                                        struct sk_buff *skb, u32 mtu,
++                                        bool confirm_neigh)
+ {
+ }
+ 
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -222,12 +222,13 @@ _decode_session4(struct sk_buff *skb, st
+ }
+ 
+ static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                            struct sk_buff *skb, u32 mtu)
++                            struct sk_buff *skb, u32 mtu,
++                            bool confirm_neigh)
+ {
+       struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+       struct dst_entry *path = xdst->route;
+ 
+-      path->ops->update_pmtu(path, sk, skb, mtu);
++      path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
+ }
+ 
+ static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk,
+--- a/net/ipv6/inet6_connection_sock.c
++++ b/net/ipv6/inet6_connection_sock.c
+@@ -150,7 +150,7 @@ struct dst_entry *inet6_csk_update_pmtu(
+ 
+       if (IS_ERR(dst))
+               return NULL;
+-      dst->ops->update_pmtu(dst, sk, NULL, mtu);
++      dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
+ 
+       dst = inet6_csk_route_socket(sk, &fl6);
+       return IS_ERR(dst) ? NULL : dst;
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1060,7 +1060,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit
+ 
+       /* TooBig packet may have updated dst->dev's mtu */
+       if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+-              dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
++              dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true);
+ 
+       err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+                          NEXTHDR_GRE);
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -99,7 +99,8 @@ static int           ip6_pkt_prohibit(struct sk_b
+ static int            ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
+ static void           ip6_link_failure(struct sk_buff *skb);
+ static void           ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                                         struct sk_buff *skb, u32 mtu);
++                                         struct sk_buff *skb, u32 mtu,
++                                         bool confirm_neigh);
+ static void           rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
+                                       struct sk_buff *skb);
+ static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
+@@ -266,7 +267,8 @@ static unsigned int ip6_blackhole_mtu(co
+ }
+ 
+ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                                       struct sk_buff *skb, u32 mtu)
++                                       struct sk_buff *skb, u32 mtu,
++                                       bool confirm_neigh)
+ {
+ }
+ 
+@@ -2352,7 +2354,8 @@ static bool rt6_cache_allowed_for_pmtu(c
+ }
+ 
+ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
+-                               const struct ipv6hdr *iph, u32 mtu)
++                               const struct ipv6hdr *iph, u32 mtu,
++                               bool confirm_neigh)
+ {
+       const struct in6_addr *daddr, *saddr;
+       struct rt6_info *rt6 = (struct rt6_info *)dst;
+@@ -2370,7 +2373,10 @@ static void __ip6_rt_update_pmtu(struct
+               daddr = NULL;
+               saddr = NULL;
+       }
+-      dst_confirm_neigh(dst, daddr);
++
++      if (confirm_neigh)
++              dst_confirm_neigh(dst, daddr);
++
+       mtu = max_t(u32, mtu, IPV6_MIN_MTU);
+       if (mtu >= dst_mtu(dst))
+               return;
+@@ -2401,9 +2407,11 @@ static void __ip6_rt_update_pmtu(struct
+ }
+ 
+ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                             struct sk_buff *skb, u32 mtu)
++                             struct sk_buff *skb, u32 mtu,
++                             bool confirm_neigh)
+ {
+-      __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
++      __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
++                           confirm_neigh);
+ }
+ 
+ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+@@ -2423,7 +2431,7 @@ void ip6_update_pmtu(struct sk_buff *skb
+ 
+       dst = ip6_route_output(net, NULL, &fl6);
+       if (!dst->error)
+-              __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
++              __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
+       dst_release(dst);
+ }
+ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -221,12 +221,13 @@ _decode_session6(struct sk_buff *skb, st
+ }
+ 
+ static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
+-                            struct sk_buff *skb, u32 mtu)
++                            struct sk_buff *skb, u32 mtu,
++                            bool confirm_neigh)
+ {
+       struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+       struct dst_entry *path = xdst->route;
+ 
+-      path->ops->update_pmtu(path, sk, skb, mtu);
++      path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
+ }
+ 
+ static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
+--- a/net/netfilter/ipvs/ip_vs_xmit.c
++++ b/net/netfilter/ipvs/ip_vs_xmit.c
+@@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int
+       struct rtable *ort = skb_rtable(skb);
+ 
+       if (!skb->dev && sk && sk_fullsock(sk))
+-              ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
++              ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true);
+ }
+ 
+ static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
+--- a/net/sctp/transport.c
++++ b/net/sctp/transport.c
+@@ -278,7 +278,7 @@ bool sctp_transport_update_pmtu(struct s
+ 
+               pf->af->from_sk(&addr, sk);
+               pf->to_sk_daddr(&t->ipaddr, sk);
+-              dst->ops->update_pmtu(dst, sk, NULL, pmtu);
++              dst->ops->update_pmtu(dst, sk, NULL, pmtu, true);
+               pf->to_sk_daddr(&addr, sk);
+ 
+               dst = sctp_transport_dst_check(t);
diff --git a/queue-4.19/net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch b/queue-4.19/net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch

new file mode 100644 (file)

index 0000000..9705cfd
--- /dev/null
+++ b/queue-4.19/net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch
@@ -0,0 +1,46 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:12 +0800
+Subject: net/dst: add new function skb_dst_update_pmtu_no_confirm
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 07dc35c6e3cc3c001915d05f5bf21f80a39a0970 ]
+
+Add a new function skb_dst_update_pmtu_no_confirm() for callers who need
+update pmtu but should not do neighbor confirm.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst.h |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -530,6 +530,15 @@ static inline void skb_dst_update_pmtu(s
+               dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
+ }
+ 
++/* update dst pmtu but not do neighbor confirm */
++static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
++{
++      struct dst_entry *dst = skb_dst(skb);
++
++      if (dst && dst->ops->update_pmtu)
++              dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
++}
++
+ static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+                                        struct dst_entry *encap_dst,
+                                        int headroom)
diff --git a/queue-4.19/net-dst-do-not-confirm-neighbor-for-vxlan-and-geneve-pmtu-update.patch b/queue-4.19/net-dst-do-not-confirm-neighbor-for-vxlan-and-geneve-pmtu-update.patch

new file mode 100644 (file)

index 0000000..4f94521
--- /dev/null
+++ b/queue-4.19/net-dst-do-not-confirm-neighbor-for-vxlan-and-geneve-pmtu-update.patch
@@ -0,0 +1,44 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:16 +0800
+Subject: net/dst: do not confirm neighbor for vxlan and geneve pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit f081042d128a0c7acbd67611def62e1b52e2d294 ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+So disable the neigh confirm for vxlan and geneve pmtu update.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Fixes: a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path")
+Fixes: 52a589d51f10 ("geneve: update skb dst pmtu on tx path")
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Tested-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/net/dst.h
++++ b/include/net/dst.h
+@@ -546,7 +546,7 @@ static inline void skb_tunnel_check_pmtu
+       u32 encap_mtu = dst_mtu(encap_dst);
+ 
+       if (skb->len > encap_mtu - headroom)
+-              skb_dst_update_pmtu(skb, encap_mtu - headroom);
++              skb_dst_update_pmtu_no_confirm(skb, encap_mtu - headroom);
+ }
+ 
+ #endif /* _NET_DST_H */
diff --git a/queue-4.19/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch b/queue-4.19/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch

new file mode 100644 (file)

index 0000000..43318ce
--- /dev/null
+++ b/queue-4.19/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch
@@ -0,0 +1,54 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Netanel Belgazal <netanel@amazon.com>
+Date: Tue, 10 Dec 2019 11:27:44 +0000
+Subject: net: ena: fix napi handler misbehavior when the napi budget is zero
+
+From: Netanel Belgazal <netanel@amazon.com>
+
+[ Upstream commit 24dee0c7478d1a1e00abdf5625b7f921467325dc ]
+
+In netpoll the napi handler could be called with budget equal to zero.
+Current ENA napi handler doesn't take that into consideration.
+
+The napi handler handles Rx packets in a do-while loop.
+Currently, the budget check happens only after decrementing the
+budget, therefore the napi handler, in rare cases, could run over
+MAX_INT packets.
+
+In addition to that, this moves all budget related variables to int
+calculation and stop mixing u32 to avoid ambiguity
+
+Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)")
+Signed-off-by: Netanel Belgazal <netanel@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_netdev.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
+@@ -1197,8 +1197,8 @@ static int ena_io_poll(struct napi_struc
+       struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+       struct ena_ring *tx_ring, *rx_ring;
+ 
+-      u32 tx_work_done;
+-      u32 rx_work_done;
++      int tx_work_done;
++      int rx_work_done = 0;
+       int tx_budget;
+       int napi_comp_call = 0;
+       int ret;
+@@ -1215,7 +1215,11 @@ static int ena_io_poll(struct napi_struc
+       }
+ 
+       tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
+-      rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
++      /* On netpoll the budget is zero and the handler should only clean the
++       * tx completions.
++       */
++      if (likely(budget))
++              rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
+ 
+       /* If the device is about to reset or down, avoid unmask
+        * the interrupt and return 0 so NAPI won't reschedule
diff --git a/queue-4.19/net-marvell-mvpp2-phylink-requires-the-link-interrupt.patch b/queue-4.19/net-marvell-mvpp2-phylink-requires-the-link-interrupt.patch

new file mode 100644 (file)

index 0000000..3ffcd56
--- /dev/null
+++ b/queue-4.19/net-marvell-mvpp2-phylink-requires-the-link-interrupt.patch
@@ -0,0 +1,51 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Tue, 10 Dec 2019 22:33:05 +0000
+Subject: net: marvell: mvpp2: phylink requires the link interrupt
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+[ Upstream commit f3f2364ea14d1cf6bf966542f31eadcf178f1577 ]
+
+phylink requires the MAC to report when its link status changes when
+operating in inband modes.  Failure to report link status changes
+means that phylink has no idea when the link events happen, which
+results in either the network interface's carrier remaining up or
+remaining permanently down.
+
+For example, with a fiber module, if the interface is brought up and
+link is initially established, taking the link down at the far end
+will cut the optical power.  The SFP module's LOS asserts, we
+deactivate the link, and the network interface reports no carrier.
+
+When the far end is brought back up, the SFP module's LOS deasserts,
+but the MAC may be slower to establish link.  If this happens (which
+in my tests is a certainty) then phylink never hears that the MAC
+has established link with the far end, and the network interface is
+stuck reporting no carrier.  This means the interface is
+non-functional.
+
+Avoiding the link interrupt when we have phylink is basically not
+an option, so remove the !port->phylink from the test.
+
+Fixes: 4bb043262878 ("net: mvpp2: phylink support")
+Tested-by: Sven Auhagen <sven.auhagen@voleatech.de>
+Tested-by: Antoine Tenart <antoine.tenart@bootlin.com>
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+@@ -3341,7 +3341,7 @@ static int mvpp2_open(struct net_device
+               valid = true;
+       }
+ 
+-      if (priv->hw_version == MVPP22 && port->link_irq && !port->phylink) {
++      if (priv->hw_version == MVPP22 && port->link_irq) {
+               err = request_irq(port->link_irq, mvpp2_link_status_isr, 0,
+                                 dev->name, port);
+               if (err) {
diff --git a/queue-4.19/net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch b/queue-4.19/net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch

new file mode 100644 (file)

index 0000000..8ba2854
--- /dev/null
+++ b/queue-4.19/net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch
@@ -0,0 +1,62 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+Date: Thu, 26 Dec 2019 10:41:56 +0200
+Subject: net/mlxfw: Fix out-of-memory error in mfa2 flash burning
+
+From: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+
+[ Upstream commit a5bcd72e054aabb93ddc51ed8cde36a5bfc50271 ]
+
+The burning process requires to perform internal allocations of large
+chunks of memory. This memory doesn't need to be contiguous and can be
+safely allocated by vzalloc() instead of kzalloc(). This patch changes
+such allocation to avoid possible out-of-memory failure.
+
+Fixes: 410ed13cae39 ("Add the mlxfw module for Mellanox firmware flash process")
+Signed-off-by: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
+Reviewed-by: Aya Levin <ayal@mellanox.com>
+Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
+Tested-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
++++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
+@@ -37,6 +37,7 @@
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/netlink.h>
++#include <linux/vmalloc.h>
+ #include <linux/xz.h>
+ #include "mlxfw_mfa2.h"
+ #include "mlxfw_mfa2_file.h"
+@@ -579,7 +580,7 @@ mlxfw_mfa2_file_component_get(const stru
+       comp_size = be32_to_cpu(comp->size);
+       comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len;
+ 
+-      comp_data = kmalloc(sizeof(*comp_data) + comp_buf_size, GFP_KERNEL);
++      comp_data = vzalloc(sizeof(*comp_data) + comp_buf_size);
+       if (!comp_data)
+               return ERR_PTR(-ENOMEM);
+       comp_data->comp.data_size = comp_size;
+@@ -601,7 +602,7 @@ mlxfw_mfa2_file_component_get(const stru
+       comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len;
+       return &comp_data->comp;
+ err_out:
+-      kfree(comp_data);
++      vfree(comp_data);
+       return ERR_PTR(err);
+ }
+ 
+@@ -610,7 +611,7 @@ void mlxfw_mfa2_file_component_put(struc
+       const struct mlxfw_mfa2_comp_data *comp_data;
+ 
+       comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp);
+-      kfree(comp_data);
++      vfree(comp_data);
+ }
+ 
+ void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file)
diff --git a/queue-4.19/net-stmmac-dwmac-meson8b-fix-the-rgmii-tx-delay-on-meson8b-8m2-socs.patch b/queue-4.19/net-stmmac-dwmac-meson8b-fix-the-rgmii-tx-delay-on-meson8b-8m2-socs.patch

new file mode 100644 (file)

index 0000000..9a88501
--- /dev/null
+++ b/queue-4.19/net-stmmac-dwmac-meson8b-fix-the-rgmii-tx-delay-on-meson8b-8m2-socs.patch
@@ -0,0 +1,80 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Date: Thu, 26 Dec 2019 20:01:01 +0100
+Subject: net: stmmac: dwmac-meson8b: Fix the RGMII TX delay on Meson8b/8m2 SoCs
+
+From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+[ Upstream commit bd6f48546b9cb7a785344fc78058c420923d7ed8 ]
+
+GXBB and newer SoCs use the fixed FCLK_DIV2 (1GHz) clock as input for
+the m250_sel clock. Meson8b and Meson8m2 use MPLL2 instead, whose rate
+can be adjusted at runtime.
+
+So far we have been running MPLL2 with ~250MHz (and the internal
+m250_div with value 1), which worked enough that we could transfer data
+with an TX delay of 4ns. Unfortunately there is high packet loss with
+an RGMII PHY when transferring data (receiving data works fine though).
+Odroid-C1's u-boot is running with a TX delay of only 2ns as well as
+the internal m250_div set to 2 - no lost (TX) packets can be observed
+with that setting in u-boot.
+
+Manual testing has shown that the TX packet loss goes away when using
+the following settings in Linux (the vendor kernel uses the same
+settings):
+- MPLL2 clock set to ~500MHz
+- m250_div set to 2
+- TX delay set to 2ns on the MAC side
+
+Update the m250_div divider settings to only accept dividers greater or
+equal 2 to fix the TX delay generated by the MAC.
+
+iperf3 results before the change:
+[ ID] Interval           Transfer     Bitrate         Retr
+[  5]   0.00-10.00  sec   182 MBytes   153 Mbits/sec  514      sender
+[  5]   0.00-10.00  sec   182 MBytes   152 Mbits/sec           receiver
+
+iperf3 results after the change (including an updated TX delay of 2ns):
+[ ID] Interval           Transfer     Bitrate         Retr  Cwnd
+[  5]   0.00-10.00  sec   927 MBytes   778 Mbits/sec    0      sender
+[  5]   0.00-10.01  sec   927 MBytes   777 Mbits/sec           receiver
+
+Fixes: 4f6a71b84e1afd ("net: stmmac: dwmac-meson8b: fix internal RGMII clock configuration")
+Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+@@ -118,6 +118,14 @@ static int meson8b_init_rgmii_tx_clk(str
+       struct device *dev = dwmac->dev;
+       const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
+       struct meson8b_dwmac_clk_configs *clk_configs;
++      static const struct clk_div_table div_table[] = {
++              { .div = 2, .val = 2, },
++              { .div = 3, .val = 3, },
++              { .div = 4, .val = 4, },
++              { .div = 5, .val = 5, },
++              { .div = 6, .val = 6, },
++              { .div = 7, .val = 7, },
++      };
+ 
+       clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL);
+       if (!clk_configs)
+@@ -152,9 +160,9 @@ static int meson8b_init_rgmii_tx_clk(str
+       clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0;
+       clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
+       clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
+-      clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED |
+-                              CLK_DIVIDER_ALLOW_ZERO |
+-                              CLK_DIVIDER_ROUND_CLOSEST;
++      clk_configs->m250_div.table = div_table;
++      clk_configs->m250_div.flags = CLK_DIVIDER_ALLOW_ZERO |
++                                    CLK_DIVIDER_ROUND_CLOSEST;
+       clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1,
+                                        &clk_divider_ops,
+                                        &clk_configs->m250_div.hw);
diff --git a/queue-4.19/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch b/queue-4.19/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch

new file mode 100644 (file)

index 0000000..f4bb705
--- /dev/null
+++ b/queue-4.19/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch
@@ -0,0 +1,317 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Vladis Dronov <vdronov@redhat.com>
+Date: Fri, 27 Dec 2019 03:26:27 +0100
+Subject: ptp: fix the race between the release of ptp_clock and cdev
+
+From: Vladis Dronov <vdronov@redhat.com>
+
+[ Upstream commit a33121e5487b424339636b25c35d3a180eaa5f5e ]
+
+In a case when a ptp chardev (like /dev/ptp0) is open but an underlying
+device is removed, closing this file leads to a race. This reproduces
+easily in a kvm virtual machine:
+
+ts# cat openptp0.c
+int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); }
+ts# uname -r
+5.5.0-rc3-46cf053e
+ts# cat /proc/cmdline
+... slub_debug=FZP
+ts# modprobe ptp_kvm
+ts# ./openptp0 &
+[1] 670
+opened /dev/ptp0, sleeping 10s...
+ts# rmmod ptp_kvm
+ts# ls /dev/ptp*
+ls: cannot access '/dev/ptp*': No such file or directory
+ts# ...woken up
+[   48.010809] general protection fault: 0000 [#1] SMP
+[   48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25
+[   48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ...
+[   48.016270] RIP: 0010:module_put.part.0+0x7/0x80
+[   48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202
+[   48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0
+[   48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b
+[   48.019470] ...                                              ^^^ a slub poison
+[   48.023854] Call Trace:
+[   48.024050]  __fput+0x21f/0x240
+[   48.024288]  task_work_run+0x79/0x90
+[   48.024555]  do_exit+0x2af/0xab0
+[   48.024799]  ? vfs_write+0x16a/0x190
+[   48.025082]  do_group_exit+0x35/0x90
+[   48.025387]  __x64_sys_exit_group+0xf/0x10
+[   48.025737]  do_syscall_64+0x3d/0x130
+[   48.026056]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[   48.026479] RIP: 0033:0x7f53b12082f6
+[   48.026792] ...
+[   48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm]
+[   48.045001] Fixing recursive fault but reboot is needed!
+
+This happens in:
+
+static void __fput(struct file *file)
+{   ...
+    if (file->f_op->release)
+        file->f_op->release(inode, file); <<< cdev is kfree'd here
+    if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
+             !(mode & FMODE_PATH))) {
+        cdev_put(inode->i_cdev); <<< cdev fields are accessed here
+
+Namely:
+
+__fput()
+  posix_clock_release()
+    kref_put(&clk->kref, delete_clock) <<< the last reference
+      delete_clock()
+        delete_ptp_clock()
+          kfree(ptp) <<< cdev is embedded in ptp
+  cdev_put
+    module_put(p->owner) <<< *p is kfree'd, bang!
+
+Here cdev is embedded in posix_clock which is embedded in ptp_clock.
+The race happens because ptp_clock's lifetime is controlled by two
+refcounts: kref and cdev.kobj in posix_clock. This is wrong.
+
+Make ptp_clock's sysfs device a parent of cdev with cdev_device_add()
+created especially for such cases. This way the parent device with its
+ptp_clock is not released until all references to the cdev are released.
+This adds a requirement that an initialized but not exposed struct
+device should be provided to posix_clock_register() by a caller instead
+of a simple dev_t.
+
+This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix
+the race between the release of watchdog_core_data and cdev"). See
+details of the implementation in the commit 233ed09d7fda ("chardev: add
+helper function to register char devs with a struct device").
+
+Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u
+Analyzed-by: Stephen Johnston <sjohnsto@redhat.com>
+Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com>
+Signed-off-by: Vladis Dronov <vdronov@redhat.com>
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ptp/ptp_clock.c     |   31 ++++++++++++++-----------------
+ drivers/ptp/ptp_private.h   |    2 +-
+ include/linux/posix-clock.h |   19 +++++++++++--------
+ kernel/time/posix-clock.c   |   31 +++++++++++++------------------
+ 4 files changed, 39 insertions(+), 44 deletions(-)
+
+--- a/drivers/ptp/ptp_clock.c
++++ b/drivers/ptp/ptp_clock.c
+@@ -175,9 +175,9 @@ static struct posix_clock_operations ptp
+       .read           = ptp_read,
+ };
+ 
+-static void delete_ptp_clock(struct posix_clock *pc)
++static void ptp_clock_release(struct device *dev)
+ {
+-      struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
++      struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev);
+ 
+       mutex_destroy(&ptp->tsevq_mux);
+       mutex_destroy(&ptp->pincfg_mux);
+@@ -222,7 +222,6 @@ struct ptp_clock *ptp_clock_register(str
+       }
+ 
+       ptp->clock.ops = ptp_clock_ops;
+-      ptp->clock.release = delete_ptp_clock;
+       ptp->info = info;
+       ptp->devid = MKDEV(major, index);
+       ptp->index = index;
+@@ -249,15 +248,6 @@ struct ptp_clock *ptp_clock_register(str
+       if (err)
+               goto no_pin_groups;
+ 
+-      /* Create a new device in our class. */
+-      ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid,
+-                                           ptp, ptp->pin_attr_groups,
+-                                           "ptp%d", ptp->index);
+-      if (IS_ERR(ptp->dev)) {
+-              err = PTR_ERR(ptp->dev);
+-              goto no_device;
+-      }
+-
+       /* Register a new PPS source. */
+       if (info->pps) {
+               struct pps_source_info pps;
+@@ -273,8 +263,18 @@ struct ptp_clock *ptp_clock_register(str
+               }
+       }
+ 
+-      /* Create a posix clock. */
+-      err = posix_clock_register(&ptp->clock, ptp->devid);
++      /* Initialize a new device of our class in our clock structure. */
++      device_initialize(&ptp->dev);
++      ptp->dev.devt = ptp->devid;
++      ptp->dev.class = ptp_class;
++      ptp->dev.parent = parent;
++      ptp->dev.groups = ptp->pin_attr_groups;
++      ptp->dev.release = ptp_clock_release;
++      dev_set_drvdata(&ptp->dev, ptp);
++      dev_set_name(&ptp->dev, "ptp%d", ptp->index);
++
++      /* Create a posix clock and link it to the device. */
++      err = posix_clock_register(&ptp->clock, &ptp->dev);
+       if (err) {
+               pr_err("failed to create posix clock\n");
+               goto no_clock;
+@@ -286,8 +286,6 @@ no_clock:
+       if (ptp->pps_source)
+               pps_unregister_source(ptp->pps_source);
+ no_pps:
+-      device_destroy(ptp_class, ptp->devid);
+-no_device:
+       ptp_cleanup_pin_groups(ptp);
+ no_pin_groups:
+       if (ptp->kworker)
+@@ -317,7 +315,6 @@ int ptp_clock_unregister(struct ptp_cloc
+       if (ptp->pps_source)
+               pps_unregister_source(ptp->pps_source);
+ 
+-      device_destroy(ptp_class, ptp->devid);
+       ptp_cleanup_pin_groups(ptp);
+ 
+       posix_clock_unregister(&ptp->clock);
+--- a/drivers/ptp/ptp_private.h
++++ b/drivers/ptp/ptp_private.h
+@@ -41,7 +41,7 @@ struct timestamp_event_queue {
+ 
+ struct ptp_clock {
+       struct posix_clock clock;
+-      struct device *dev;
++      struct device dev;
+       struct ptp_clock_info *info;
+       dev_t devid;
+       int index; /* index into clocks.map */
+--- a/include/linux/posix-clock.h
++++ b/include/linux/posix-clock.h
+@@ -82,29 +82,32 @@ struct posix_clock_operations {
+  *
+  * @ops:     Functional interface to the clock
+  * @cdev:    Character device instance for this clock
+- * @kref:    Reference count.
++ * @dev:     Pointer to the clock's device.
+  * @rwsem:   Protects the 'zombie' field from concurrent access.
+  * @zombie:  If 'zombie' is true, then the hardware has disappeared.
+- * @release: A function to free the structure when the reference count reaches
+- *           zero. May be NULL if structure is statically allocated.
+  *
+  * Drivers should embed their struct posix_clock within a private
+  * structure, obtaining a reference to it during callbacks using
+  * container_of().
++ *
++ * Drivers should supply an initialized but not exposed struct device
++ * to posix_clock_register(). It is used to manage lifetime of the
++ * driver's private structure. It's 'release' field should be set to
++ * a release function for this private structure.
+  */
+ struct posix_clock {
+       struct posix_clock_operations ops;
+       struct cdev cdev;
+-      struct kref kref;
++      struct device *dev;
+       struct rw_semaphore rwsem;
+       bool zombie;
+-      void (*release)(struct posix_clock *clk);
+ };
+ 
+ /**
+  * posix_clock_register() - register a new clock
+- * @clk:   Pointer to the clock. Caller must provide 'ops' and 'release'
+- * @devid: Allocated device id
++ * @clk:   Pointer to the clock. Caller must provide 'ops' field
++ * @dev:   Pointer to the initialized device. Caller must provide
++ *         'release' field
+  *
+  * A clock driver calls this function to register itself with the
+  * clock device subsystem. If 'clk' points to dynamically allocated
+@@ -113,7 +116,7 @@ struct posix_clock {
+  *
+  * Returns zero on success, non-zero otherwise.
+  */
+-int posix_clock_register(struct posix_clock *clk, dev_t devid);
++int posix_clock_register(struct posix_clock *clk, struct device *dev);
+ 
+ /**
+  * posix_clock_unregister() - unregister a clock
+--- a/kernel/time/posix-clock.c
++++ b/kernel/time/posix-clock.c
+@@ -27,8 +27,6 @@
+ 
+ #include "posix-timers.h"
+ 
+-static void delete_clock(struct kref *kref);
+-
+ /*
+  * Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
+  */
+@@ -138,7 +136,7 @@ static int posix_clock_open(struct inode
+               err = 0;
+ 
+       if (!err) {
+-              kref_get(&clk->kref);
++              get_device(clk->dev);
+               fp->private_data = clk;
+       }
+ out:
+@@ -154,7 +152,7 @@ static int posix_clock_release(struct in
+       if (clk->ops.release)
+               err = clk->ops.release(clk);
+ 
+-      kref_put(&clk->kref, delete_clock);
++      put_device(clk->dev);
+ 
+       fp->private_data = NULL;
+ 
+@@ -174,38 +172,35 @@ static const struct file_operations posi
+ #endif
+ };
+ 
+-int posix_clock_register(struct posix_clock *clk, dev_t devid)
++int posix_clock_register(struct posix_clock *clk, struct device *dev)
+ {
+       int err;
+ 
+-      kref_init(&clk->kref);
+       init_rwsem(&clk->rwsem);
+ 
+       cdev_init(&clk->cdev, &posix_clock_file_operations);
++      err = cdev_device_add(&clk->cdev, dev);
++      if (err) {
++              pr_err("%s unable to add device %d:%d\n",
++                      dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt));
++              return err;
++      }
+       clk->cdev.owner = clk->ops.owner;
+-      err = cdev_add(&clk->cdev, devid, 1);
++      clk->dev = dev;
+ 
+-      return err;
++      return 0;
+ }
+ EXPORT_SYMBOL_GPL(posix_clock_register);
+ 
+-static void delete_clock(struct kref *kref)
+-{
+-      struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
+-
+-      if (clk->release)
+-              clk->release(clk);
+-}
+-
+ void posix_clock_unregister(struct posix_clock *clk)
+ {
+-      cdev_del(&clk->cdev);
++      cdev_device_del(&clk->cdev, clk->dev);
+ 
+       down_write(&clk->rwsem);
+       clk->zombie = true;
+       up_write(&clk->rwsem);
+ 
+-      kref_put(&clk->kref, delete_clock);
++      put_device(clk->dev);
+ }
+ EXPORT_SYMBOL_GPL(posix_clock_unregister);
+ 
diff --git a/queue-4.19/series b/queue-4.19/series

index 158fc8beea89ec1f6220282fc7f02fbc0541b88e..69db9a541712dc1a26711d878f7445c7551d2d9b 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -87,3 +87,25 @@ inetpeer-fix-data-race-in-inet_putpeer-inet_putpeer.patch
  net-add-a-read_once-in-skb_peek_tail.patch
  net-icmp-fix-data-race-in-cmp_global_allow.patch
  hrtimer-annotate-lockless-access-to-timer-state.patch
+net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch
+net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch
+net-stmmac-dwmac-meson8b-fix-the-rgmii-tx-delay-on-meson8b-8m2-socs.patch
+ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch
+tcp-fix-highest_sack-and-highest_sack_seq.patch
+udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch
+vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch
+net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch
+ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch
+gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch
+net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch
+tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch
+vti-do-not-confirm-neighbor-when-do-pmtu-update.patch
+sit-do-not-confirm-neighbor-when-do-pmtu-update.patch
+net-dst-do-not-confirm-neighbor-for-vxlan-and-geneve-pmtu-update.patch
+gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch
+net-marvell-mvpp2-phylink-requires-the-link-interrupt.patch
+tcp-dccp-fix-possible-race-__inet_lookup_established.patch
+tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch
+gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch
+gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch
+gtp-avoid-zero-size-hashtable.patch
diff --git a/queue-4.19/sit-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-4.19/sit-do-not-confirm-neighbor-when-do-pmtu-update.patch

new file mode 100644 (file)

index 0000000..031383e
--- /dev/null
+++ b/queue-4.19/sit-do-not-confirm-neighbor-when-do-pmtu-update.patch
@@ -0,0 +1,39 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:15 +0800
+Subject: sit: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 4d42df46d6372ece4cb4279870b46c2ea7304a47 ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+v5: No change.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/sit.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/sit.c
++++ b/net/ipv6/sit.c
+@@ -943,7 +943,7 @@ static netdev_tx_t ipip6_tunnel_xmit(str
+               }
+ 
+               if (tunnel->parms.iph.daddr)
+-                      skb_dst_update_pmtu(skb, mtu);
++                      skb_dst_update_pmtu_no_confirm(skb, mtu);
+ 
+               if (skb->len > mtu && !skb_is_gso(skb)) {
+                       icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
diff --git a/queue-4.19/tcp-dccp-fix-possible-race-__inet_lookup_established.patch b/queue-4.19/tcp-dccp-fix-possible-race-__inet_lookup_established.patch

new file mode 100644 (file)

index 0000000..ba7d1bf
--- /dev/null
+++ b/queue-4.19/tcp-dccp-fix-possible-race-__inet_lookup_established.patch
@@ -0,0 +1,223 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 13 Dec 2019 18:20:41 -0800
+Subject: tcp/dccp: fix possible race __inet_lookup_established()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 8dbd76e79a16b45b2ccb01d2f2e08dbf64e71e40 ]
+
+Michal Kubecek and Firo Yang did a very nice analysis of crashes
+happening in __inet_lookup_established().
+
+Since a TCP socket can go from TCP_ESTABLISH to TCP_LISTEN
+(via a close()/socket()/listen() cycle) without a RCU grace period,
+I should not have changed listeners linkage in their hash table.
+
+They must use the nulls protocol (Documentation/RCU/rculist_nulls.txt),
+so that a lookup can detect a socket in a hash list was moved in
+another one.
+
+Since we added code in commit d296ba60d8e2 ("soreuseport: Resolve
+merge conflict for v4/v6 ordering fix"), we have to add
+hlist_nulls_add_tail_rcu() helper.
+
+Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Michal Kubecek <mkubecek@suse.cz>
+Reported-by: Firo Yang <firo.yang@suse.com>
+Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
+Link: https://lore.kernel.org/netdev/20191120083919.GH27852@unicorn.suse.cz/
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/rculist_nulls.h |   37 +++++++++++++++++++++++++++++++++++++
+ include/net/inet_hashtables.h |   12 +++++++++---
+ include/net/sock.h            |    5 +++++
+ net/ipv4/inet_diag.c          |    3 ++-
+ net/ipv4/inet_hashtables.c    |   16 ++++++++--------
+ net/ipv4/tcp_ipv4.c           |    7 ++++---
+ 6 files changed, 65 insertions(+), 15 deletions(-)
+
+--- a/include/linux/rculist_nulls.h
++++ b/include/linux/rculist_nulls.h
+@@ -101,6 +101,43 @@ static inline void hlist_nulls_add_head_
+ }
+ 
+ /**
++ * hlist_nulls_add_tail_rcu
++ * @n: the element to add to the hash list.
++ * @h: the list to add to.
++ *
++ * Description:
++ * Adds the specified element to the specified hlist_nulls,
++ * while permitting racing traversals.
++ *
++ * The caller must take whatever precautions are necessary
++ * (such as holding appropriate locks) to avoid racing
++ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
++ * or hlist_nulls_del_rcu(), running on this same list.
++ * However, it is perfectly legal to run concurrently with
++ * the _rcu list-traversal primitives, such as
++ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
++ * problems on Alpha CPUs.  Regardless of the type of CPU, the
++ * list-traversal primitive must be guarded by rcu_read_lock().
++ */
++static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
++                                          struct hlist_nulls_head *h)
++{
++      struct hlist_nulls_node *i, *last = NULL;
++
++      /* Note: write side code, so rcu accessors are not needed. */
++      for (i = h->first; !is_a_nulls(i); i = i->next)
++              last = i;
++
++      if (last) {
++              n->next = last->next;
++              n->pprev = &last->next;
++              rcu_assign_pointer(hlist_next_rcu(last), n);
++      } else {
++              hlist_nulls_add_head_rcu(n, h);
++      }
++}
++
++/**
+  * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
+  * @tpos:     the type * to use as a loop cursor.
+  * @pos:      the &struct hlist_nulls_node to use as a loop cursor.
+--- a/include/net/inet_hashtables.h
++++ b/include/net/inet_hashtables.h
+@@ -106,13 +106,19 @@ struct inet_bind_hashbucket {
+       struct hlist_head       chain;
+ };
+ 
+-/*
+- * Sockets can be hashed in established or listening table
++/* Sockets can be hashed in established or listening table.
++ * We must use different 'nulls' end-of-chain value for all hash buckets :
++ * A socket might transition from ESTABLISH to LISTEN state without
++ * RCU grace period. A lookup in ehash table needs to handle this case.
+  */
++#define LISTENING_NULLS_BASE (1U << 29)
+ struct inet_listen_hashbucket {
+       spinlock_t              lock;
+       unsigned int            count;
+-      struct hlist_head       head;
++      union {
++              struct hlist_head       head;
++              struct hlist_nulls_head nulls_head;
++      };
+ };
+ 
+ /* This is for listening sockets, thus all sockets which possess wildcards. */
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -721,6 +721,11 @@ static inline void __sk_nulls_add_node_r
+       hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
+ }
+ 
++static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list)
++{
++      hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
++}
++
+ static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
+ {
+       sock_hold(sk);
+--- a/net/ipv4/inet_diag.c
++++ b/net/ipv4/inet_diag.c
+@@ -918,11 +918,12 @@ void inet_diag_dump_icsk(struct inet_has
+ 
+               for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
+                       struct inet_listen_hashbucket *ilb;
++                      struct hlist_nulls_node *node;
+ 
+                       num = 0;
+                       ilb = &hashinfo->listening_hash[i];
+                       spin_lock(&ilb->lock);
+-                      sk_for_each(sk, &ilb->head) {
++                      sk_nulls_for_each(sk, node, &ilb->nulls_head) {
+                               struct inet_sock *inet = inet_sk(sk);
+ 
+                               if (!net_eq(sock_net(sk), net))
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -560,10 +560,11 @@ static int inet_reuseport_add_sock(struc
+                                  struct inet_listen_hashbucket *ilb)
+ {
+       struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
++      const struct hlist_nulls_node *node;
+       struct sock *sk2;
+       kuid_t uid = sock_i_uid(sk);
+ 
+-      sk_for_each_rcu(sk2, &ilb->head) {
++      sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) {
+               if (sk2 != sk &&
+                   sk2->sk_family == sk->sk_family &&
+                   ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
+@@ -599,9 +600,9 @@ int __inet_hash(struct sock *sk, struct
+       }
+       if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+               sk->sk_family == AF_INET6)
+-              hlist_add_tail_rcu(&sk->sk_node, &ilb->head);
++              __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
+       else
+-              hlist_add_head_rcu(&sk->sk_node, &ilb->head);
++              __sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
+       inet_hash2(hashinfo, sk);
+       ilb->count++;
+       sock_set_flag(sk, SOCK_RCU_FREE);
+@@ -650,11 +651,9 @@ void inet_unhash(struct sock *sk)
+               reuseport_detach_sock(sk);
+       if (ilb) {
+               inet_unhash2(hashinfo, sk);
+-               __sk_del_node_init(sk);
+-               ilb->count--;
+-      } else {
+-              __sk_nulls_del_node_init_rcu(sk);
++              ilb->count--;
+       }
++      __sk_nulls_del_node_init_rcu(sk);
+       sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ unlock:
+       spin_unlock_bh(lock);
+@@ -790,7 +789,8 @@ void inet_hashinfo_init(struct inet_hash
+ 
+       for (i = 0; i < INET_LHTABLE_SIZE; i++) {
+               spin_lock_init(&h->listening_hash[i].lock);
+-              INIT_HLIST_HEAD(&h->listening_hash[i].head);
++              INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head,
++                                    i + LISTENING_NULLS_BASE);
+               h->listening_hash[i].count = 0;
+       }
+ 
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -2020,13 +2020,14 @@ static void *listening_get_next(struct s
+       struct tcp_iter_state *st = seq->private;
+       struct net *net = seq_file_net(seq);
+       struct inet_listen_hashbucket *ilb;
++      struct hlist_nulls_node *node;
+       struct sock *sk = cur;
+ 
+       if (!sk) {
+ get_head:
+               ilb = &tcp_hashinfo.listening_hash[st->bucket];
+               spin_lock(&ilb->lock);
+-              sk = sk_head(&ilb->head);
++              sk = sk_nulls_head(&ilb->nulls_head);
+               st->offset = 0;
+               goto get_sk;
+       }
+@@ -2034,9 +2035,9 @@ get_head:
+       ++st->num;
+       ++st->offset;
+ 
+-      sk = sk_next(sk);
++      sk = sk_nulls_next(sk);
+ get_sk:
+-      sk_for_each_from(sk) {
++      sk_nulls_for_each_from(sk, node) {
+               if (!net_eq(sock_net(sk), net))
+                       continue;
+               if (sk->sk_family == afinfo->family)
diff --git a/queue-4.19/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch b/queue-4.19/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch

new file mode 100644 (file)

index 0000000..72c717a
--- /dev/null
+++ b/queue-4.19/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch
@@ -0,0 +1,53 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 12 Dec 2019 12:55:29 -0800
+Subject: tcp: do not send empty skb from tcp_write_xmit()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 1f85e6267caca44b30c54711652b0726fadbb131 ]
+
+Backport of commit fdfc5c8594c2 ("tcp: remove empty skb from
+write queue in error cases") in linux-4.14 stable triggered
+various bugs. One of them has been fixed in commit ba2ddb43f270
+("tcp: Don't dequeue SYN/FIN-segments from write-queue"), but
+we still have crashes in some occasions.
+
+Root-cause is that when tcp_sendmsg() has allocated a fresh
+skb and could not append a fragment before being blocked
+in sk_stream_wait_memory(), tcp_write_xmit() might be called
+and decide to send this fresh and empty skb.
+
+Sending an empty packet is not only silly, it might have caused
+many issues we had in the past with tp->packets_out being
+out of sync.
+
+Fixes: c65f7f00c587 ("[TCP]: Simplify SKB data portion allocation with NETIF_F_SG.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Christoph Paasch <cpaasch@apple.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Cc: Jason Baron <jbaron@akamai.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2376,6 +2376,14 @@ static bool tcp_write_xmit(struct sock *
+               if (tcp_small_queue_check(sk, skb, 0))
+                       break;
+ 
++              /* Argh, we hit an empty skb(), presumably a thread
++               * is sleeping in sendmsg()/sk_stream_wait_memory().
++               * We do not want to send a pure-ack packet and have
++               * a strange looking rtx queue with empty packet(s).
++               */
++              if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
++                      break;
++
+               if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
+                       break;
+ 
diff --git a/queue-4.19/tcp-fix-highest_sack-and-highest_sack_seq.patch b/queue-4.19/tcp-fix-highest_sack-and-highest_sack_seq.patch

new file mode 100644 (file)

index 0000000..27bb517
--- /dev/null
+++ b/queue-4.19/tcp-fix-highest_sack-and-highest_sack_seq.patch
@@ -0,0 +1,45 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Cambda Zhu <cambda@linux.alibaba.com>
+Date: Fri, 27 Dec 2019 16:52:37 +0800
+Subject: tcp: Fix highest_sack and highest_sack_seq
+
+From: Cambda Zhu <cambda@linux.alibaba.com>
+
+[ Upstream commit 853697504de043ff0bfd815bd3a64de1dce73dc7 ]
+
+>From commit 50895b9de1d3 ("tcp: highest_sack fix"), the logic about
+setting tp->highest_sack to the head of the send queue was removed.
+Of course the logic is error prone, but it is logical. Before we
+remove the pointer to the highest sack skb and use the seq instead,
+we need to set tp->highest_sack to NULL when there is no skb after
+the last sack, and then replace NULL with the real skb when new skb
+inserted into the rtx queue, because the NULL means the highest sack
+seq is tp->snd_nxt. If tp->highest_sack is NULL and new data sent,
+the next ACK with sack option will increase tp->reordering unexpectedly.
+
+This patch sets tp->highest_sack to the tail of the rtx queue if
+it's NULL and new data is sent. The patch keeps the rule that the
+highest_sack can only be maintained by sack processing, except for
+this only case.
+
+Fixes: 50895b9de1d3 ("tcp: highest_sack fix")
+Signed-off-by: Cambda Zhu <cambda@linux.alibaba.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -60,6 +60,9 @@ static void tcp_event_new_data_sent(stru
+       __skb_unlink(skb, &sk->sk_write_queue);
+       tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
+ 
++      if (tp->highest_sack == NULL)
++              tp->highest_sack = skb;
++
+       tp->packets_out += tcp_skb_pcount(skb);
+       if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
+               tcp_rearm_rto(sk);
diff --git a/queue-4.19/tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-4.19/tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch

new file mode 100644 (file)

index 0000000..c60da7d
--- /dev/null
+++ b/queue-4.19/tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch
@@ -0,0 +1,62 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:13 +0800
+Subject: tunnel: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 7a1592bcb15d71400a98632727791d1e68ea0ee8 ]
+
+When do tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+v5: No Change.
+v4: Update commit description
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Fixes: 0dec879f636f ("net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP")
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Tested-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_tunnel.c  |    2 +-
+ net/ipv6/ip6_tunnel.c |    4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -513,7 +513,7 @@ static int tnl_update_pmtu(struct net_de
+       else
+               mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+ 
+-      skb_dst_update_pmtu(skb, mtu);
++      skb_dst_update_pmtu_no_confirm(skb, mtu);
+ 
+       if (skb->protocol == htons(ETH_P_IP)) {
+               if (!skb_is_gso(skb) &&
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -645,7 +645,7 @@ ip4ip6_err(struct sk_buff *skb, struct i
+               if (rel_info > dst_mtu(skb_dst(skb2)))
+                       goto out;
+ 
+-              skb_dst_update_pmtu(skb2, rel_info);
++              skb_dst_update_pmtu_no_confirm(skb2, rel_info);
+       }
+ 
+       icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
+@@ -1137,7 +1137,7 @@ route_lookup:
+       mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
+                      IPV6_MIN_MTU : IPV4_MIN_MTU);
+ 
+-      skb_dst_update_pmtu(skb, mtu);
++      skb_dst_update_pmtu_no_confirm(skb, mtu);
+       if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
+               *pmtu = mtu;
+               err = -EMSGSIZE;
diff --git a/queue-4.19/udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch b/queue-4.19/udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch

new file mode 100644 (file)

index 0000000..df3a355
--- /dev/null
+++ b/queue-4.19/udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch
@@ -0,0 +1,38 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Antonio Messina <amessina@google.com>
+Date: Thu, 19 Dec 2019 15:08:03 +0100
+Subject: udp: fix integer overflow while computing available space in sk_rcvbuf
+
+From: Antonio Messina <amessina@google.com>
+
+[ Upstream commit feed8a4fc9d46c3126fb9fcae0e9248270c6321a ]
+
+When the size of the receive buffer for a socket is close to 2^31 when
+computing if we have enough space in the buffer to copy a packet from
+the queue to the buffer we might hit an integer overflow.
+
+When an user set net.core.rmem_default to a value close to 2^31 UDP
+packets are dropped because of this overflow. This can be visible, for
+instance, with failure to resolve hostnames.
+
+This can be fixed by casting sk_rcvbuf (which is an int) to unsigned
+int, similarly to how it is done in TCP.
+
+Signed-off-by: Antonio Messina <amessina@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1412,7 +1412,7 @@ int __udp_enqueue_schedule_skb(struct so
+        * queue contains some other skb
+        */
+       rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
+-      if (rmem > (size + sk->sk_rcvbuf))
++      if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
+               goto uncharge_drop;
+ 
+       spin_lock(&list->lock);
diff --git a/queue-4.19/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch b/queue-4.19/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch

new file mode 100644 (file)

index 0000000..6355861
--- /dev/null
+++ b/queue-4.19/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch
@@ -0,0 +1,35 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Stefano Garzarella <sgarzare@redhat.com>
+Date: Fri, 6 Dec 2019 15:39:12 +0100
+Subject: vhost/vsock: accept only packets with the right dst_cid
+
+From: Stefano Garzarella <sgarzare@redhat.com>
+
+[ Upstream commit 8a3cc29c316c17de590e3ff8b59f3d6cbfd37b0a ]
+
+When we receive a new packet from the guest, we check if the
+src_cid is correct, but we forgot to check the dst_cid.
+
+The host should accept only packets where dst_cid is
+equal to the host CID.
+
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/vsock.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/vhost/vsock.c
++++ b/drivers/vhost/vsock.c
+@@ -436,7 +436,9 @@ static void vhost_vsock_handle_tx_kick(s
+               virtio_transport_deliver_tap_pkt(pkt);
+ 
+               /* Only accept correctly addressed packets */
+-              if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
++              if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid &&
++                  le64_to_cpu(pkt->hdr.dst_cid) ==
++                  vhost_transport_get_local_cid())
+                       virtio_transport_recv_pkt(pkt);
+               else
+                       virtio_transport_free_pkt(pkt);
diff --git a/queue-4.19/vti-do-not-confirm-neighbor-when-do-pmtu-update.patch b/queue-4.19/vti-do-not-confirm-neighbor-when-do-pmtu-update.patch

new file mode 100644 (file)

index 0000000..51b2a62
--- /dev/null
+++ b/queue-4.19/vti-do-not-confirm-neighbor-when-do-pmtu-update.patch
@@ -0,0 +1,55 @@
+From foo@baz Wed 01 Jan 2020 10:36:29 PM CET
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Sun, 22 Dec 2019 10:51:14 +0800
+Subject: vti: do not confirm neighbor when do pmtu update
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 8247a79efa2f28b44329f363272550c1738377de ]
+
+When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
+we should not call dst_confirm_neigh() as there is no two-way communication.
+
+Although vti and vti6 are immune to this problem because they are IFF_NOARP
+interfaces, as Guillaume pointed. There is still no sense to confirm neighbour
+here.
+
+v5: Update commit description.
+v4: No change.
+v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
+    dst_ops.update_pmtu to control whether we should do neighbor confirm.
+    Also split the big patch to small ones for each area.
+v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.
+
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_vti.c  |    2 +-
+ net/ipv6/ip6_vti.c |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_vti.c
++++ b/net/ipv4/ip_vti.c
+@@ -235,7 +235,7 @@ static netdev_tx_t vti_xmit(struct sk_bu
+ 
+       mtu = dst_mtu(dst);
+       if (skb->len > mtu) {
+-              skb_dst_update_pmtu(skb, mtu);
++              skb_dst_update_pmtu_no_confirm(skb, mtu);
+               if (skb->protocol == htons(ETH_P_IP)) {
+                       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+                                 htonl(mtu));
+--- a/net/ipv6/ip6_vti.c
++++ b/net/ipv6/ip6_vti.c
+@@ -483,7 +483,7 @@ vti6_xmit(struct sk_buff *skb, struct ne
+ 
+       mtu = dst_mtu(dst);
+       if (skb->len > mtu) {
+-              skb_dst_update_pmtu(skb, mtu);
++              skb_dst_update_pmtu_no_confirm(skb, mtu);
+ 
+               if (skb->protocol == htons(ETH_P_IPV6)) {
+                       if (mtu < IPV6_MIN_MTU)
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 1 Jan 2020 21:36:47 +0000 (22:36 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 1 Jan 2020 21:36:47 +0000 (22:36 +0100)
queue-4.19/gtp-avoid-zero-size-hashtable.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/gtp-do-not-allow-adding-duplicate-tid-and-ms_addr-pdp-context.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/gtp-do-not-confirm-neighbor-when-do-pmtu-update.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/gtp-fix-an-use-after-free-in-ipv4_pdp_find.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/gtp-fix-wrong-condition-in-gtp_genl_dump_pdp.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/ip6_gre-do-not-confirm-neighbor-when-do-pmtu-update.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-add-bool-confirm_neigh-parameter-for-dst_ops.update_pmtu.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-dst-add-new-function-skb_dst_update_pmtu_no_confirm.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-dst-do-not-confirm-neighbor-for-vxlan-and-geneve-pmtu-update.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-ena-fix-napi-handler-misbehavior-when-the-napi-budget-is-zero.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-marvell-mvpp2-phylink-requires-the-link-interrupt.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-mlxfw-fix-out-of-memory-error-in-mfa2-flash-burning.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-stmmac-dwmac-meson8b-fix-the-rgmii-tx-delay-on-meson8b-8m2-socs.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/ptp-fix-the-race-between-the-release-of-ptp_clock-and-cdev.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history
queue-4.19/sit-do-not-confirm-neighbor-when-do-pmtu-update.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/tcp-dccp-fix-possible-race-__inet_lookup_established.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/tcp-do-not-send-empty-skb-from-tcp_write_xmit.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/tcp-fix-highest_sack-and-highest_sack_seq.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/tunnel-do-not-confirm-neighbor-when-do-pmtu-update.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/udp-fix-integer-overflow-while-computing-available-space-in-sk_rcvbuf.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/vhost-vsock-accept-only-packets-with-the-right-dst_cid.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/vti-do-not-confirm-neighbor-when-do-pmtu-update.patch	[new file with mode: 0644]	patch \| blob