4.19-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 16 May 2020 16:10:35 +0000 (18:10 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 16 May 2020 16:10:35 +0000 (18:10 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 16 May 2020 16:10:35 +0000 (18:10 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 16 May 2020 16:10:35 +0000 (18:10 +0200)
diff --git a/queue-4.19/hinic-fix-a-bug-of-ndo_stop.patch b/queue-4.19/hinic-fix-a-bug-of-ndo_stop.patch

new file mode 100644 (file)

index 0000000..6eab180
--- /dev/null
+++ b/queue-4.19/hinic-fix-a-bug-of-ndo_stop.patch
@@ -0,0 +1,119 @@
+From foo@baz Sat 16 May 2020 06:06:27 PM CEST
+From: Luo bin <luobin9@huawei.com>
+Date: Sun, 10 May 2020 19:01:08 +0000
+Subject: hinic: fix a bug of ndo_stop
+
+From: Luo bin <luobin9@huawei.com>
+
+[ Upstream commit e8a1b0efd632d1c9db7d4e93da66377c7b524862 ]
+
+if some function in ndo_stop interface returns failure because of
+hardware fault, must go on excuting rest steps rather than return
+failure directly, otherwise will cause memory leak.And bump the
+timeout for SET_FUNC_STATE to ensure that cmd won't return failure
+when hw is busy. Otherwise hw may stomp host memory if we free
+memory regardless of the return value of SET_FUNC_STATE.
+
+Fixes: 51ba902a16e6 ("net-next/hinic: Initialize hw interface")
+Signed-off-by: Luo bin <luobin9@huawei.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c |   16 ++++++++++++----
+ drivers/net/ethernet/huawei/hinic/hinic_main.c    |   18 +++---------------
+ 2 files changed, 15 insertions(+), 19 deletions(-)
+
+--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+@@ -54,6 +54,8 @@
+ 
+ #define MGMT_MSG_TIMEOUT                5000
+ 
++#define SET_FUNC_PORT_MGMT_TIMEOUT    25000
++
+ #define mgmt_to_pfhwdev(pf_mgmt)        \
+               container_of(pf_mgmt, struct hinic_pfhwdev, pf_to_mgmt)
+ 
+@@ -247,12 +249,13 @@ static int msg_to_mgmt_sync(struct hinic
+                           u8 *buf_in, u16 in_size,
+                           u8 *buf_out, u16 *out_size,
+                           enum mgmt_direction_type direction,
+-                          u16 resp_msg_id)
++                          u16 resp_msg_id, u32 timeout)
+ {
+       struct hinic_hwif *hwif = pf_to_mgmt->hwif;
+       struct pci_dev *pdev = hwif->pdev;
+       struct hinic_recv_msg *recv_msg;
+       struct completion *recv_done;
++      unsigned long timeo;
+       u16 msg_id;
+       int err;
+ 
+@@ -276,8 +279,9 @@ static int msg_to_mgmt_sync(struct hinic
+               goto unlock_sync_msg;
+       }
+ 
+-      if (!wait_for_completion_timeout(recv_done,
+-                                       msecs_to_jiffies(MGMT_MSG_TIMEOUT))) {
++      timeo = msecs_to_jiffies(timeout ? timeout : MGMT_MSG_TIMEOUT);
++
++      if (!wait_for_completion_timeout(recv_done, timeo)) {
+               dev_err(&pdev->dev, "MGMT timeout, MSG id = %d\n", msg_id);
+               err = -ETIMEDOUT;
+               goto unlock_sync_msg;
+@@ -351,6 +355,7 @@ int hinic_msg_to_mgmt(struct hinic_pf_to
+ {
+       struct hinic_hwif *hwif = pf_to_mgmt->hwif;
+       struct pci_dev *pdev = hwif->pdev;
++      u32 timeout = 0;
+ 
+       if (sync != HINIC_MGMT_MSG_SYNC) {
+               dev_err(&pdev->dev, "Invalid MGMT msg type\n");
+@@ -362,9 +367,12 @@ int hinic_msg_to_mgmt(struct hinic_pf_to
+               return -EINVAL;
+       }
+ 
++      if (cmd == HINIC_PORT_CMD_SET_FUNC_STATE)
++              timeout = SET_FUNC_PORT_MGMT_TIMEOUT;
++
+       return msg_to_mgmt_sync(pf_to_mgmt, mod, cmd, buf_in, in_size,
+                               buf_out, out_size, MGMT_DIRECT_SEND,
+-                              MSG_NOT_RESP);
++                              MSG_NOT_RESP, timeout);
+ }
+ 
+ /**
+--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
++++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
+@@ -475,7 +475,6 @@ static int hinic_close(struct net_device
+ {
+       struct hinic_dev *nic_dev = netdev_priv(netdev);
+       unsigned int flags;
+-      int err;
+ 
+       down(&nic_dev->mgmt_lock);
+ 
+@@ -489,20 +488,9 @@ static int hinic_close(struct net_device
+ 
+       up(&nic_dev->mgmt_lock);
+ 
+-      err = hinic_port_set_func_state(nic_dev, HINIC_FUNC_PORT_DISABLE);
+-      if (err) {
+-              netif_err(nic_dev, drv, netdev,
+-                        "Failed to set func port state\n");
+-              nic_dev->flags |= (flags & HINIC_INTF_UP);
+-              return err;
+-      }
+-
+-      err = hinic_port_set_state(nic_dev, HINIC_PORT_DISABLE);
+-      if (err) {
+-              netif_err(nic_dev, drv, netdev, "Failed to set port state\n");
+-              nic_dev->flags |= (flags & HINIC_INTF_UP);
+-              return err;
+-      }
++      hinic_port_set_state(nic_dev, HINIC_PORT_DISABLE);
++
++      hinic_port_set_func_state(nic_dev, HINIC_FUNC_PORT_DISABLE);
+ 
+       free_rxqs(nic_dev);
+       free_txqs(nic_dev);
diff --git a/queue-4.19/net-dsa-loop-add-module-soft-dependency.patch b/queue-4.19/net-dsa-loop-add-module-soft-dependency.patch

new file mode 100644 (file)

index 0000000..e118db7
--- /dev/null
+++ b/queue-4.19/net-dsa-loop-add-module-soft-dependency.patch
@@ -0,0 +1,32 @@
+From foo@baz Sat 16 May 2020 06:06:27 PM CEST
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Sat, 9 May 2020 16:45:44 -0700
+Subject: net: dsa: loop: Add module soft dependency
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 3047211ca11bf77b3ecbce045c0aa544d934b945 ]
+
+There is a soft dependency against dsa_loop_bdinfo.ko which sets up the
+MDIO device registration, since there are no symbols referenced by
+dsa_loop.ko, there is no automatic loading of dsa_loop_bdinfo.ko which
+is needed.
+
+Fixes: 98cd1552ea27 ("net: dsa: Mock-up driver")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/dsa_loop.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/dsa/dsa_loop.c
++++ b/drivers/net/dsa/dsa_loop.c
+@@ -360,6 +360,7 @@ static void __exit dsa_loop_exit(void)
+ }
+ module_exit(dsa_loop_exit);
+ 
++MODULE_SOFTDEP("pre: dsa_loop_bdinfo");
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Florian Fainelli");
+ MODULE_DESCRIPTION("DSA loopback driver");
diff --git a/queue-4.19/net-fix-a-potential-recursive-netdev_feat_change.patch b/queue-4.19/net-fix-a-potential-recursive-netdev_feat_change.patch

new file mode 100644 (file)

index 0000000..e948256
--- /dev/null
+++ b/queue-4.19/net-fix-a-potential-recursive-netdev_feat_change.patch
@@ -0,0 +1,66 @@
+From foo@baz Sat 16 May 2020 06:06:27 PM CEST
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Thu, 7 May 2020 12:19:03 -0700
+Subject: net: fix a potential recursive NETDEV_FEAT_CHANGE
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit dd912306ff008891c82cd9f63e8181e47a9cb2fb ]
+
+syzbot managed to trigger a recursive NETDEV_FEAT_CHANGE event
+between bonding master and slave. I managed to find a reproducer
+for this:
+
+  ip li set bond0 up
+  ifenslave bond0 eth0
+  brctl addbr br0
+  ethtool -K eth0 lro off
+  brctl addif br0 bond0
+  ip li set br0 up
+
+When a NETDEV_FEAT_CHANGE event is triggered on a bonding slave,
+it captures this and calls bond_compute_features() to fixup its
+master's and other slaves' features. However, when syncing with
+its lower devices by netdev_sync_lower_features() this event is
+triggered again on slaves when the LRO feature fails to change,
+so it goes back and forth recursively until the kernel stack is
+exhausted.
+
+Commit 17b85d29e82c intentionally lets __netdev_update_features()
+return -1 for such a failure case, so we have to just rely on
+the existing check inside netdev_sync_lower_features() and skip
+NETDEV_FEAT_CHANGE event only for this specific failure case.
+
+Fixes: fd867d51f889 ("net/core: generic support for disabling netdev features down stack")
+Reported-by: syzbot+e73ceacfd8560cc8a3ca@syzkaller.appspotmail.com
+Reported-by: syzbot+c2fb6f9ddcea95ba49b5@syzkaller.appspotmail.com
+Cc: Jarod Wilson <jarod@redhat.com>
+Cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Jann Horn <jannh@google.com>
+Reviewed-by: Jay Vosburgh <jay.vosburgh@canonical.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -8259,11 +8259,13 @@ static void netdev_sync_lower_features(s
+                       netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
+                                  &feature, lower->name);
+                       lower->wanted_features &= ~feature;
+-                      netdev_update_features(lower);
++                      __netdev_update_features(lower);
+ 
+                       if (unlikely(lower->features & feature))
+                               netdev_WARN(upper, "failed to disable %pNF on %s!\n",
+                                           &feature, lower->name);
++                      else
++                              netdev_features_change(lower);
+               }
+       }
+ }
diff --git a/queue-4.19/net-ipv4-really-enforce-backoff-for-redirects.patch b/queue-4.19/net-ipv4-really-enforce-backoff-for-redirects.patch

new file mode 100644 (file)

index 0000000..03e56f1
--- /dev/null
+++ b/queue-4.19/net-ipv4-really-enforce-backoff-for-redirects.patch
@@ -0,0 +1,48 @@
+From foo@baz Sat 16 May 2020 06:06:27 PM CEST
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 8 May 2020 19:28:34 +0200
+Subject: net: ipv4: really enforce backoff for redirects
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 57644431a6c2faac5d754ebd35780cf43a531b1a ]
+
+In commit b406472b5ad7 ("net: ipv4: avoid mixed n_redirects and
+rate_tokens usage") I missed the fact that a 0 'rate_tokens' will
+bypass the backoff algorithm.
+
+Since rate_tokens is cleared after a redirect silence, and never
+incremented on redirects, if the host keeps receiving packets
+requiring redirect it will reply ignoring the backoff.
+
+Additionally, the 'rate_last' field will be updated with the
+cadence of the ingress packet requiring redirect. If that rate is
+high enough, that will prevent the host from generating any
+other kind of ICMP messages
+
+The check for a zero 'rate_tokens' value was likely a shortcut
+to avoid the more complex backoff algorithm after a redirect
+silence period. Address the issue checking for 'n_redirects'
+instead, which is incremented on successful redirect, and
+does not interfere with other ICMP replies.
+
+Fixes: b406472b5ad7 ("net: ipv4: avoid mixed n_redirects and rate_tokens usage")
+Reported-and-tested-by: Colin Walters <walters@redhat.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -906,7 +906,7 @@ void ip_rt_send_redirect(struct sk_buff
+       /* Check for load limit; set rate_last to the latest sent
+        * redirect.
+        */
+-      if (peer->rate_tokens == 0 ||
++      if (peer->n_redirects == 0 ||
+           time_after(jiffies,
+                      (peer->rate_last +
+                       (ip_rt_redirect_load << peer->n_redirects)))) {
diff --git a/queue-4.19/net-phy-fix-aneg-restart-in-phy_ethtool_set_eee.patch b/queue-4.19/net-phy-fix-aneg-restart-in-phy_ethtool_set_eee.patch

new file mode 100644 (file)

index 0000000..369ee3e
--- /dev/null
+++ b/queue-4.19/net-phy-fix-aneg-restart-in-phy_ethtool_set_eee.patch
@@ -0,0 +1,39 @@
+From foo@baz Sat 16 May 2020 06:06:27 PM CEST
+From: Heiner Kallweit <hkallweit1@gmail.com>
+Date: Tue, 12 May 2020 21:45:53 +0200
+Subject: net: phy: fix aneg restart in phy_ethtool_set_eee
+
+From: Heiner Kallweit <hkallweit1@gmail.com>
+
+[ Upstream commit 9de5d235b60a7cdfcdd5461e70c5663e713fde87 ]
+
+phy_restart_aneg() enables aneg in the PHY. That's not what we want
+if phydev->autoneg is disabled. In this case still update EEE
+advertisement register, but don't enable aneg and don't trigger an
+aneg restart.
+
+Fixes: f75abeb8338e ("net: phy: restart phy autonegotiation after EEE advertisment change")
+Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/phy/phy.c
++++ b/drivers/net/phy/phy.c
+@@ -1302,9 +1302,11 @@ int phy_ethtool_set_eee(struct phy_devic
+               /* Restart autonegotiation so the new modes get sent to the
+                * link partner.
+                */
+-              ret = phy_restart_aneg(phydev);
+-              if (ret < 0)
+-                      return ret;
++              if (phydev->autoneg == AUTONEG_ENABLE) {
++                      ret = phy_restart_aneg(phydev);
++                      if (ret < 0)
++                              return ret;
++              }
+       }
+ 
+       return 0;
diff --git a/queue-4.19/net-tcp-fix-rx-timestamp-behavior-for-tcp_recvmsg.patch b/queue-4.19/net-tcp-fix-rx-timestamp-behavior-for-tcp_recvmsg.patch

new file mode 100644 (file)

index 0000000..c3b7cf6
--- /dev/null
+++ b/queue-4.19/net-tcp-fix-rx-timestamp-behavior-for-tcp_recvmsg.patch
@@ -0,0 +1,52 @@
+From foo@baz Sat 16 May 2020 06:06:27 PM CEST
+From: Kelly Littlepage <kelly@onechronos.com>
+Date: Fri, 8 May 2020 19:58:46 +0000
+Subject: net: tcp: fix rx timestamp behavior for tcp_recvmsg
+
+From: Kelly Littlepage <kelly@onechronos.com>
+
+[ Upstream commit cc4de047b33be247f9c8150d3e496743a49642b8 ]
+
+The stated intent of the original commit is to is to "return the timestamp
+corresponding to the highest sequence number data returned." The current
+implementation returns the timestamp for the last byte of the last fully
+read skb, which is not necessarily the last byte in the recv buffer. This
+patch converts behavior to the original definition, and to the behavior of
+the previous draft versions of commit 98aaa913b4ed ("tcp: Extend
+SOF_TIMESTAMPING_RX_SOFTWARE to TCP recvmsg") which also match this
+behavior.
+
+Fixes: 98aaa913b4ed ("tcp: Extend SOF_TIMESTAMPING_RX_SOFTWARE to TCP recvmsg")
+Co-developed-by: Iris Liu <iris@onechronos.com>
+Signed-off-by: Iris Liu <iris@onechronos.com>
+Signed-off-by: Kelly Littlepage <kelly@onechronos.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Acked-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2135,14 +2135,16 @@ skip_copy:
+                       tp->urg_data = 0;
+                       tcp_fast_path_check(sk);
+               }
+-              if (used + offset < skb->len)
+-                      continue;
+ 
+               if (TCP_SKB_CB(skb)->has_rxtstamp) {
+                       tcp_update_recv_tstamps(skb, &tss);
+                       has_tss = true;
+                       has_cmsg = true;
+               }
++
++              if (used + offset < skb->len)
++                      continue;
++
+               if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+                       goto found_fin_ok;
+               if (!(flags & MSG_PEEK))
diff --git a/queue-4.19/netlabel-cope-with-null-catmap.patch b/queue-4.19/netlabel-cope-with-null-catmap.patch

new file mode 100644 (file)

index 0000000..3cec9ab
--- /dev/null
+++ b/queue-4.19/netlabel-cope-with-null-catmap.patch
@@ -0,0 +1,84 @@
+From foo@baz Sat 16 May 2020 06:06:27 PM CEST
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Tue, 12 May 2020 14:43:14 +0200
+Subject: netlabel: cope with NULL catmap
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit eead1c2ea2509fd754c6da893a94f0e69e83ebe4 ]
+
+The cipso and calipso code can set the MLS_CAT attribute on
+successful parsing, even if the corresponding catmap has
+not been allocated, as per current configuration and external
+input.
+
+Later, selinux code tries to access the catmap if the MLS_CAT flag
+is present via netlbl_catmap_getlong(). That may cause null ptr
+dereference while processing incoming network traffic.
+
+Address the issue setting the MLS_CAT flag only if the catmap is
+really allocated. Additionally let netlbl_catmap_getlong() cope
+with NULL catmap.
+
+Reported-by: Matthew Sheets <matthew.sheets@gd-ms.com>
+Fixes: 4b8feff251da ("netlabel: fix the horribly broken catmap functions")
+Fixes: ceba1832b1b2 ("calipso: Set the calipso socket label to match the secattr.")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/cipso_ipv4.c        |    6 ++++--
+ net/ipv6/calipso.c           |    3 ++-
+ net/netlabel/netlabel_kapi.c |    6 ++++++
+ 3 files changed, 12 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/cipso_ipv4.c
++++ b/net/ipv4/cipso_ipv4.c
+@@ -1272,7 +1272,8 @@ static int cipso_v4_parsetag_rbm(const s
+                       return ret_val;
+               }
+ 
+-              secattr->flags |= NETLBL_SECATTR_MLS_CAT;
++              if (secattr->attr.mls.cat)
++                      secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+       }
+ 
+       return 0;
+@@ -1453,7 +1454,8 @@ static int cipso_v4_parsetag_rng(const s
+                       return ret_val;
+               }
+ 
+-              secattr->flags |= NETLBL_SECATTR_MLS_CAT;
++              if (secattr->attr.mls.cat)
++                      secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+       }
+ 
+       return 0;
+--- a/net/ipv6/calipso.c
++++ b/net/ipv6/calipso.c
+@@ -1061,7 +1061,8 @@ static int calipso_opt_getattr(const uns
+                       goto getattr_return;
+               }
+ 
+-              secattr->flags |= NETLBL_SECATTR_MLS_CAT;
++              if (secattr->attr.mls.cat)
++                      secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+       }
+ 
+       secattr->type = NETLBL_NLTYPE_CALIPSO;
+--- a/net/netlabel/netlabel_kapi.c
++++ b/net/netlabel/netlabel_kapi.c
+@@ -748,6 +748,12 @@ int netlbl_catmap_getlong(struct netlbl_
+       if ((off & (BITS_PER_LONG - 1)) != 0)
+               return -EINVAL;
+ 
++      /* a null catmap is equivalent to an empty one */
++      if (!catmap) {
++              *offset = (u32)-1;
++              return 0;
++      }
++
+       if (off < catmap->startbit) {
+               off = catmap->startbit;
+               *offset = off;
diff --git a/queue-4.19/netprio_cgroup-fix-unlimited-memory-leak-of-v2-cgroups.patch b/queue-4.19/netprio_cgroup-fix-unlimited-memory-leak-of-v2-cgroups.patch

new file mode 100644 (file)

index 0000000..ab4585e
--- /dev/null
+++ b/queue-4.19/netprio_cgroup-fix-unlimited-memory-leak-of-v2-cgroups.patch
@@ -0,0 +1,50 @@
+From foo@baz Sat 16 May 2020 06:06:27 PM CEST
+From: Zefan Li <lizefan@huawei.com>
+Date: Sat, 9 May 2020 11:32:10 +0800
+Subject: netprio_cgroup: Fix unlimited memory leak of v2 cgroups
+
+From: Zefan Li <lizefan@huawei.com>
+
+[ Upstream commit 090e28b229af92dc5b40786ca673999d59e73056 ]
+
+If systemd is configured to use hybrid mode which enables the use of
+both cgroup v1 and v2, systemd will create new cgroup on both the default
+root (v2) and netprio_cgroup hierarchy (v1) for a new session and attach
+task to the two cgroups. If the task does some network thing then the v2
+cgroup can never be freed after the session exited.
+
+One of our machines ran into OOM due to this memory leak.
+
+In the scenario described above when sk_alloc() is called
+cgroup_sk_alloc() thought it's in v2 mode, so it stores
+the cgroup pointer in sk->sk_cgrp_data and increments
+the cgroup refcnt, but then sock_update_netprioidx()
+thought it's in v1 mode, so it stores netprioidx value
+in sk->sk_cgrp_data, so the cgroup refcnt will never be freed.
+
+Currently we do the mode switch when someone writes to the ifpriomap
+cgroup control file. The easiest fix is to also do the switch when
+a task is attached to a new cgroup.
+
+Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup")
+Reported-by: Yang Yingliang <yangyingliang@huawei.com>
+Tested-by: Yang Yingliang <yangyingliang@huawei.com>
+Signed-off-by: Zefan Li <lizefan@huawei.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/netprio_cgroup.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/core/netprio_cgroup.c
++++ b/net/core/netprio_cgroup.c
+@@ -240,6 +240,8 @@ static void net_prio_attach(struct cgrou
+       struct task_struct *p;
+       struct cgroup_subsys_state *css;
+ 
++      cgroup_sk_alloc_disable();
++
+       cgroup_taskset_for_each(p, css, tset) {
+               void *v = (void *)(unsigned long)css->cgroup->id;
+ 
diff --git a/queue-4.19/pppoe-only-process-padt-targeted-at-local-interfaces.patch b/queue-4.19/pppoe-only-process-padt-targeted-at-local-interfaces.patch

new file mode 100644 (file)

index 0000000..fdc8a30
--- /dev/null
+++ b/queue-4.19/pppoe-only-process-padt-targeted-at-local-interfaces.patch
@@ -0,0 +1,35 @@
+From foo@baz Sat 16 May 2020 06:06:27 PM CEST
+From: Guillaume Nault <gnault@redhat.com>
+Date: Thu, 14 May 2020 12:15:39 +0200
+Subject: pppoe: only process PADT targeted at local interfaces
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit b8c158395119be62294da73646a3953c29ac974b ]
+
+We don't want to disconnect a session because of a stray PADT arriving
+while the interface is in promiscuous mode.
+Furthermore, multicast and broadcast packets make no sense here, so
+only PACKET_HOST is accepted.
+
+Reported-by: David Balažic <xerces9@gmail.com>
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/pppoe.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/ppp/pppoe.c
++++ b/drivers/net/ppp/pppoe.c
+@@ -497,6 +497,9 @@ static int pppoe_disc_rcv(struct sk_buff
+       if (!skb)
+               goto out;
+ 
++      if (skb->pkt_type != PACKET_HOST)
++              goto abort;
++
+       if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr)))
+               goto abort;
+ 
diff --git a/queue-4.19/revert-ipv6-add-mtu-lock-check-in-__ip6_rt_update_pmtu.patch b/queue-4.19/revert-ipv6-add-mtu-lock-check-in-__ip6_rt_update_pmtu.patch

new file mode 100644 (file)

index 0000000..085b08f
--- /dev/null
+++ b/queue-4.19/revert-ipv6-add-mtu-lock-check-in-__ip6_rt_update_pmtu.patch
@@ -0,0 +1,63 @@
+From foo@baz Sat 16 May 2020 06:06:27 PM CEST
+From: "Maciej Żenczykowski" <maze@google.com>
+Date: Tue, 5 May 2020 11:57:23 -0700
+Subject: Revert "ipv6: add mtu lock check in __ip6_rt_update_pmtu"
+
+From: "Maciej Żenczykowski" <maze@google.com>
+
+[ Upstream commit 09454fd0a4ce23cb3d8af65066c91a1bf27120dd ]
+
+This reverts commit 19bda36c4299ce3d7e5bce10bebe01764a655a6d:
+
+| ipv6: add mtu lock check in __ip6_rt_update_pmtu
+|
+| Prior to this patch, ipv6 didn't do mtu lock check in ip6_update_pmtu.
+| It leaded to that mtu lock doesn't really work when receiving the pkt
+| of ICMPV6_PKT_TOOBIG.
+|
+| This patch is to add mtu lock check in __ip6_rt_update_pmtu just as ipv4
+| did in __ip_rt_update_pmtu.
+
+The above reasoning is incorrect.  IPv6 *requires* icmp based pmtu to work.
+There's already a comment to this effect elsewhere in the kernel:
+
+  $ git grep -p -B1 -A3 'RTAX_MTU lock'
+  net/ipv6/route.c=4813=
+
+  static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
+  ...
+    /* In IPv6 pmtu discovery is not optional,
+       so that RTAX_MTU lock cannot disable it.
+       We still use this lock to block changes
+       caused by addrconf/ndisc.
+    */
+
+This reverts to the pre-4.9 behaviour.
+
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Cc: Xin Long <lucien.xin@gmail.com>
+Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: Maciej Żenczykowski <maze@google.com>
+Fixes: 19bda36c4299 ("ipv6: add mtu lock check in __ip6_rt_update_pmtu")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -2360,8 +2360,10 @@ static void __ip6_rt_update_pmtu(struct
+       const struct in6_addr *daddr, *saddr;
+       struct rt6_info *rt6 = (struct rt6_info *)dst;
+ 
+-      if (dst_metric_locked(dst, RTAX_MTU))
+-              return;
++      /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
++       * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
++       * [see also comment in rt6_mtu_change_route()]
++       */
+ 
+       if (iph) {
+               daddr = &iph->daddr;
diff --git a/queue-4.19/series b/queue-4.19/series

index e0f7bdf7ad6c2658d197da881b692c539b2e7fca..f177c5186e9aee88f4d40ea1be0e9238c5924483 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -6,3 +6,16 @@ drop_monitor-work-around-gcc-10-stringop-overflow-wa.patch
  virtio-blk-handle-block_device_operations-callbacks-.patch
  scsi-sg-add-sg_remove_request-in-sg_write.patch
  mmc-sdhci-acpi-add-sdhci_quirk2_broken_64_bit_dma-fo.patch
+net-fix-a-potential-recursive-netdev_feat_change.patch
+netlabel-cope-with-null-catmap.patch
+net-phy-fix-aneg-restart-in-phy_ethtool_set_eee.patch
+pppoe-only-process-padt-targeted-at-local-interfaces.patch
+revert-ipv6-add-mtu-lock-check-in-__ip6_rt_update_pmtu.patch
+tcp-fix-error-recovery-in-tcp_zerocopy_receive.patch
+virtio_net-fix-lockdep-warning-on-32-bit.patch
+hinic-fix-a-bug-of-ndo_stop.patch
+net-dsa-loop-add-module-soft-dependency.patch
+net-ipv4-really-enforce-backoff-for-redirects.patch
+netprio_cgroup-fix-unlimited-memory-leak-of-v2-cgroups.patch
+net-tcp-fix-rx-timestamp-behavior-for-tcp_recvmsg.patch
+tcp-fix-so_rcvlowat-hangs-with-fat-skbs.patch
diff --git a/queue-4.19/tcp-fix-error-recovery-in-tcp_zerocopy_receive.patch b/queue-4.19/tcp-fix-error-recovery-in-tcp_zerocopy_receive.patch

new file mode 100644 (file)

index 0000000..d61c6dc
--- /dev/null
+++ b/queue-4.19/tcp-fix-error-recovery-in-tcp_zerocopy_receive.patch
@@ -0,0 +1,76 @@
+From foo@baz Sat 16 May 2020 06:06:27 PM CEST
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 14 May 2020 13:58:13 -0700
+Subject: tcp: fix error recovery in tcp_zerocopy_receive()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e776af608f692a7a647455106295fa34469e7475 ]
+
+If user provides wrong virtual address in TCP_ZEROCOPY_RECEIVE
+operation we want to return -EINVAL error.
+
+But depending on zc->recv_skip_hint content, we might return
+-EIO error if the socket has SOCK_DONE set.
+
+Make sure to return -EINVAL in this case.
+
+BUG: KMSAN: uninit-value in tcp_zerocopy_receive net/ipv4/tcp.c:1833 [inline]
+BUG: KMSAN: uninit-value in do_tcp_getsockopt+0x4494/0x6320 net/ipv4/tcp.c:3685
+CPU: 1 PID: 625 Comm: syz-executor.0 Not tainted 5.7.0-rc4-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x1c9/0x220 lib/dump_stack.c:118
+ kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:121
+ __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215
+ tcp_zerocopy_receive net/ipv4/tcp.c:1833 [inline]
+ do_tcp_getsockopt+0x4494/0x6320 net/ipv4/tcp.c:3685
+ tcp_getsockopt+0xf8/0x1f0 net/ipv4/tcp.c:3728
+ sock_common_getsockopt+0x13f/0x180 net/core/sock.c:3131
+ __sys_getsockopt+0x533/0x7b0 net/socket.c:2177
+ __do_sys_getsockopt net/socket.c:2192 [inline]
+ __se_sys_getsockopt+0xe1/0x100 net/socket.c:2189
+ __x64_sys_getsockopt+0x62/0x80 net/socket.c:2189
+ do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:297
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+RIP: 0033:0x45c829
+Code: 0d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007f1deeb72c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000037
+RAX: ffffffffffffffda RBX: 00000000004e01e0 RCX: 000000000045c829
+RDX: 0000000000000023 RSI: 0000000000000006 RDI: 0000000000000009
+RBP: 000000000078bf00 R08: 0000000020000200 R09: 0000000000000000
+R10: 00000000200001c0 R11: 0000000000000246 R12: 00000000ffffffff
+R13: 00000000000001d8 R14: 00000000004d3038 R15: 00007f1deeb736d4
+
+Local variable ----zc@do_tcp_getsockopt created at:
+ do_tcp_getsockopt+0x1a74/0x6320 net/ipv4/tcp.c:3670
+ do_tcp_getsockopt+0x1a74/0x6320 net/ipv4/tcp.c:3670
+
+Fixes: 05255b823a61 ("tcp: add TCP_ZEROCOPY_RECEIVE support for zerocopy receive")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1774,10 +1774,11 @@ static int tcp_zerocopy_receive(struct s
+ 
+       down_read(&current->mm->mmap_sem);
+ 
+-      ret = -EINVAL;
+       vma = find_vma(current->mm, address);
+-      if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops)
+-              goto out;
++      if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops) {
++              up_read(&current->mm->mmap_sem);
++              return -EINVAL;
++      }
+       zc->length = min_t(unsigned long, zc->length, vma->vm_end - address);
+ 
+       tp = tcp_sk(sk);
diff --git a/queue-4.19/tcp-fix-so_rcvlowat-hangs-with-fat-skbs.patch b/queue-4.19/tcp-fix-so_rcvlowat-hangs-with-fat-skbs.patch

new file mode 100644 (file)

index 0000000..9c16c1f
--- /dev/null
+++ b/queue-4.19/tcp-fix-so_rcvlowat-hangs-with-fat-skbs.patch
@@ -0,0 +1,95 @@
+From foo@baz Sat 16 May 2020 06:06:27 PM CEST
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 12 May 2020 06:54:30 -0700
+Subject: tcp: fix SO_RCVLOWAT hangs with fat skbs
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 24adbc1676af4e134e709ddc7f34cf2adc2131e4 ]
+
+We autotune rcvbuf whenever SO_RCVLOWAT is set to account for 100%
+overhead in tcp_set_rcvlowat()
+
+This works well when skb->len/skb->truesize ratio is bigger than 0.5
+
+But if we receive packets with small MSS, we can end up in a situation
+where not enough bytes are available in the receive queue to satisfy
+RCVLOWAT setting.
+As our sk_rcvbuf limit is hit, we send zero windows in ACK packets,
+preventing remote peer from sending more data.
+
+Even autotuning does not help, because it only triggers at the time
+user process drains the queue. If no EPOLLIN is generated, this
+can not happen.
+
+Note poll() has a similar issue, after commit
+c7004482e8dc ("tcp: Respect SO_RCVLOWAT in tcp_poll().")
+
+Fixes: 03f45c883c6f ("tcp: avoid extra wakeups for SO_RCVLOWAT users")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h    |   13 +++++++++++++
+ net/ipv4/tcp.c       |   14 +++++++++++---
+ net/ipv4/tcp_input.c |    3 ++-
+ 3 files changed, 26 insertions(+), 4 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1373,6 +1373,19 @@ static inline int tcp_full_space(const s
+       return tcp_win_from_space(sk, sk->sk_rcvbuf);
+ }
+ 
++/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
++ * If 87.5 % (7/8) of the space has been consumed, we want to override
++ * SO_RCVLOWAT constraint, since we are receiving skbs with too small
++ * len/truesize ratio.
++ */
++static inline bool tcp_rmem_pressure(const struct sock *sk)
++{
++      int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
++      int threshold = rcvbuf - (rcvbuf >> 3);
++
++      return atomic_read(&sk->sk_rmem_alloc) > threshold;
++}
++
+ extern void tcp_openreq_init_rwin(struct request_sock *req,
+                                 const struct sock *sk_listener,
+                                 const struct dst_entry *dst);
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -488,9 +488,17 @@ static void tcp_tx_timestamp(struct sock
+ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
+                                         int target, struct sock *sk)
+ {
+-      return (READ_ONCE(tp->rcv_nxt) - tp->copied_seq >= target) ||
+-              (sk->sk_prot->stream_memory_read ?
+-              sk->sk_prot->stream_memory_read(sk) : false);
++      int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
++
++      if (avail > 0) {
++              if (avail >= target)
++                      return true;
++              if (tcp_rmem_pressure(sk))
++                      return true;
++      }
++      if (sk->sk_prot->stream_memory_read)
++              return sk->sk_prot->stream_memory_read(sk);
++      return false;
+ }
+ 
+ /*
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -4683,7 +4683,8 @@ void tcp_data_ready(struct sock *sk)
+       const struct tcp_sock *tp = tcp_sk(sk);
+       int avail = tp->rcv_nxt - tp->copied_seq;
+ 
+-      if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE))
++      if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
++          !sock_flag(sk, SOCK_DONE))
+               return;
+ 
+       sk->sk_data_ready(sk);
diff --git a/queue-4.19/virtio_net-fix-lockdep-warning-on-32-bit.patch b/queue-4.19/virtio_net-fix-lockdep-warning-on-32-bit.patch

new file mode 100644 (file)

index 0000000..63d940b
--- /dev/null
+++ b/queue-4.19/virtio_net-fix-lockdep-warning-on-32-bit.patch
@@ -0,0 +1,57 @@
+From foo@baz Sat 16 May 2020 06:06:27 PM CEST
+From: "Michael S. Tsirkin" <mst@redhat.com>
+Date: Thu, 7 May 2020 03:25:56 -0400
+Subject: virtio_net: fix lockdep warning on 32 bit
+
+From: "Michael S. Tsirkin" <mst@redhat.com>
+
+[ Upstream commit 01c3259818a11f3cc3cd767adbae6b45849c03c1 ]
+
+When we fill up a receive VQ, try_fill_recv currently tries to count
+kicks using a 64 bit stats counter. Turns out, on a 32 bit kernel that
+uses a seqcount. sequence counts are "lock" constructs where you need to
+make sure that writers are serialized.
+
+In turn, this means that we mustn't run two try_fill_recv concurrently.
+Which of course we don't. We do run try_fill_recv sometimes from a
+softirq napi context, and sometimes from a fully preemptible context,
+but the later always runs with napi disabled.
+
+However, when it comes to the seqcount, lockdep is trying to enforce the
+rule that the same lock isn't accessed from preemptible and softirq
+context - it doesn't know about napi being enabled/disabled. This causes
+a false-positive warning:
+
+WARNING: inconsistent lock state
+...
+inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
+
+As a work around, shut down the warning by switching
+to u64_stats_update_begin_irqsave - that works by disabling
+interrupts on 32 bit only, is a NOP on 64 bit.
+
+Reported-by: Thomas Gleixner <tglx@linutronix.de>
+Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -1242,9 +1242,11 @@ static bool try_fill_recv(struct virtnet
+                       break;
+       } while (rq->vq->num_free);
+       if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) {
+-              u64_stats_update_begin(&rq->stats.syncp);
++              unsigned long flags;
++
++              flags = u64_stats_update_begin_irqsave(&rq->stats.syncp);
+               rq->stats.kicks++;
+-              u64_stats_update_end(&rq->stats.syncp);
++              u64_stats_update_end_irqrestore(&rq->stats.syncp, flags);
+       }
+ 
+       return !oom;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 16 May 2020 16:10:35 +0000 (18:10 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 16 May 2020 16:10:35 +0000 (18:10 +0200)
queue-4.19/hinic-fix-a-bug-of-ndo_stop.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-dsa-loop-add-module-soft-dependency.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-fix-a-potential-recursive-netdev_feat_change.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-ipv4-really-enforce-backoff-for-redirects.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-phy-fix-aneg-restart-in-phy_ethtool_set_eee.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-tcp-fix-rx-timestamp-behavior-for-tcp_recvmsg.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/netlabel-cope-with-null-catmap.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/netprio_cgroup-fix-unlimited-memory-leak-of-v2-cgroups.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/pppoe-only-process-padt-targeted-at-local-interfaces.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/revert-ipv6-add-mtu-lock-check-in-__ip6_rt_update_pmtu.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history
queue-4.19/tcp-fix-error-recovery-in-tcp_zerocopy_receive.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/tcp-fix-so_rcvlowat-hangs-with-fat-skbs.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/virtio_net-fix-lockdep-warning-on-32-bit.patch	[new file with mode: 0644]	patch \| blob