4.19-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 2 Feb 2019 09:53:44 +0000 (10:53 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 2 Feb 2019 09:53:44 +0000 (10:53 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 2 Feb 2019 09:53:44 +0000 (10:53 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 2 Feb 2019 09:53:44 +0000 (10:53 +0100)
diff --git a/queue-4.19/ip6mr-fix-notifiers-call-on-mroute_clean_tables.patch b/queue-4.19/ip6mr-fix-notifiers-call-on-mroute_clean_tables.patch

new file mode 100644 (file)

index 0000000..2b70336
--- /dev/null
+++ b/queue-4.19/ip6mr-fix-notifiers-call-on-mroute_clean_tables.patch
@@ -0,0 +1,90 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Nir Dotan <nird@mellanox.com>
+Date: Sun, 27 Jan 2019 09:26:22 +0200
+Subject: ip6mr: Fix notifiers call on mroute_clean_tables()
+
+From: Nir Dotan <nird@mellanox.com>
+
+[ Upstream commit 146820cc240f4389cf33481c058d9493aef95e25 ]
+
+When the MC route socket is closed, mroute_clean_tables() is called to
+cleanup existing routes. Mistakenly notifiers call was put on the cleanup
+of the unresolved MC route entries cache.
+In a case where the MC socket closes before an unresolved route expires,
+the notifier call leads to a crash, caused by the driver trying to
+increment a non initialized refcount_t object [1] and then when handling
+is done, to decrement it [2]. This was detected by a test recently added in
+commit 6d4efada3b82 ("selftests: forwarding: Add multicast routing test").
+
+Fix that by putting notifiers call on the resolved entries traversal,
+instead of on the unresolved entries traversal.
+
+[1]
+
+[  245.748967] refcount_t: increment on 0; use-after-free.
+[  245.754829] WARNING: CPU: 3 PID: 3223 at lib/refcount.c:153 refcount_inc_checked+0x2b/0x30
+...
+[  245.802357] Hardware name: Mellanox Technologies Ltd. MSN2740/SA001237, BIOS 5.6.5 06/07/2016
+[  245.811873] RIP: 0010:refcount_inc_checked+0x2b/0x30
+...
+[  245.907487] Call Trace:
+[  245.910231]  mlxsw_sp_router_fib_event.cold.181+0x42/0x47 [mlxsw_spectrum]
+[  245.917913]  notifier_call_chain+0x45/0x7
+[  245.922484]  atomic_notifier_call_chain+0x15/0x20
+[  245.927729]  call_fib_notifiers+0x15/0x30
+[  245.932205]  mroute_clean_tables+0x372/0x3f
+[  245.936971]  ip6mr_sk_done+0xb1/0xc0
+[  245.940960]  ip6_mroute_setsockopt+0x1da/0x5f0
+...
+
+[2]
+
+[  246.128487] refcount_t: underflow; use-after-free.
+[  246.133859] WARNING: CPU: 0 PID: 7 at lib/refcount.c:187 refcount_sub_and_test_checked+0x4c/0x60
+[  246.183521] Hardware name: Mellanox Technologies Ltd. MSN2740/SA001237, BIOS 5.6.5 06/07/2016
+...
+[  246.193062] Workqueue: mlxsw_core_ordered mlxsw_sp_router_fibmr_event_work [mlxsw_spectrum]
+[  246.202394] RIP: 0010:refcount_sub_and_test_checked+0x4c/0x60
+...
+[  246.298889] Call Trace:
+[  246.301617]  refcount_dec_and_test_checked+0x11/0x20
+[  246.307170]  mlxsw_sp_router_fibmr_event_work.cold.196+0x47/0x78 [mlxsw_spectrum]
+[  246.315531]  process_one_work+0x1fa/0x3f0
+[  246.320005]  worker_thread+0x2f/0x3e0
+[  246.324083]  kthread+0x118/0x130
+[  246.327683]  ? wq_update_unbound_numa+0x1b0/0x1b0
+[  246.332926]  ? kthread_park+0x80/0x80
+[  246.337013]  ret_from_fork+0x1f/0x30
+
+Fixes: 088aa3eec2ce ("ip6mr: Support fib notifications")
+Signed-off-by: Nir Dotan <nird@mellanox.com>
+Reviewed-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6mr.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/ip6mr.c
++++ b/net/ipv6/ip6mr.c
+@@ -1506,6 +1506,9 @@ static void mroute_clean_tables(struct m
+                       continue;
+               rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+               list_del_rcu(&c->list);
++              call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
++                                             FIB_EVENT_ENTRY_DEL,
++                                             (struct mfc6_cache *)c, mrt->id);
+               mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+               mr_cache_put(c);
+       }
+@@ -1514,10 +1517,6 @@ static void mroute_clean_tables(struct m
+               spin_lock_bh(&mfc_unres_lock);
+               list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
+                       list_del(&c->list);
+-                      call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+-                                                     FIB_EVENT_ENTRY_DEL,
+-                                                     (struct mfc6_cache *)c,
+-                                                     mrt->id);
+                       mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+                                         RTM_DELROUTE);
+                       ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
diff --git a/queue-4.19/ipv6-consider-sk_bound_dev_if-when-binding-a-socket-to-an-address.patch b/queue-4.19/ipv6-consider-sk_bound_dev_if-when-binding-a-socket-to-an-address.patch

new file mode 100644 (file)

index 0000000..b10fdcf
--- /dev/null
+++ b/queue-4.19/ipv6-consider-sk_bound_dev_if-when-binding-a-socket-to-an-address.patch
@@ -0,0 +1,37 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: David Ahern <dsahern@gmail.com>
+Date: Wed, 2 Jan 2019 18:57:09 -0800
+Subject: ipv6: Consider sk_bound_dev_if when binding a socket to an address
+
+From: David Ahern <dsahern@gmail.com>
+
+[ Upstream commit c5ee066333ebc322a24a00a743ed941a0c68617e ]
+
+IPv6 does not consider if the socket is bound to a device when binding
+to an address. The result is that a socket can be bound to eth0 and then
+bound to the address of eth1. If the device is a VRF, the result is that
+a socket can only be bound to an address in the default VRF.
+
+Resolve by considering the device if sk_bound_dev_if is set.
+
+This problem exists from the beginning of git history.
+
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/af_inet6.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -361,6 +361,9 @@ static int __inet6_bind(struct sock *sk,
+                                       err = -EINVAL;
+                                       goto out_unlock;
+                               }
++                      }
++
++                      if (sk->sk_bound_dev_if) {
+                               dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+                               if (!dev) {
+                                       err = -ENODEV;
diff --git a/queue-4.19/ipv6-sr-clear-ip6cb-skb-on-srh-ip4ip6-encapsulation.patch b/queue-4.19/ipv6-sr-clear-ip6cb-skb-on-srh-ip4ip6-encapsulation.patch

new file mode 100644 (file)

index 0000000..cd0c48f
--- /dev/null
+++ b/queue-4.19/ipv6-sr-clear-ip6cb-skb-on-srh-ip4ip6-encapsulation.patch
@@ -0,0 +1,37 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Yohei Kanemaru <yohei.kanemaru@gmail.com>
+Date: Tue, 29 Jan 2019 15:52:34 +0900
+Subject: ipv6: sr: clear IP6CB(skb) on SRH ip4ip6 encapsulation
+
+From: Yohei Kanemaru <yohei.kanemaru@gmail.com>
+
+[ Upstream commit ef489749aae508e6f17886775c075f12ff919fb1 ]
+
+skb->cb may contain data from previous layers (in an observed case
+IPv4 with L3 Master Device). In the observed scenario, the data in
+IPCB(skb)->frags was misinterpreted as IP6CB(skb)->frag_max_size,
+eventually caused an unexpected IPv6 fragmentation in ip6_fragment()
+through ip6_finish_output().
+
+This patch clears IP6CB(skb), which potentially contains garbage data,
+on the SRH ip4ip6 encapsulation.
+
+Fixes: 32d99d0b6702 ("ipv6: sr: add support for ip4ip6 encapsulation")
+Signed-off-by: Yohei Kanemaru <yohei.kanemaru@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/seg6_iptunnel.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv6/seg6_iptunnel.c
++++ b/net/ipv6/seg6_iptunnel.c
+@@ -146,6 +146,8 @@ int seg6_do_srh_encap(struct sk_buff *sk
+       } else {
+               ip6_flow_hdr(hdr, 0, flowlabel);
+               hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
++
++              memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+       }
+ 
+       hdr->nexthdr = NEXTHDR_ROUTING;
diff --git a/queue-4.19/ipvlan-l3mdev-fix-broken-l3s-mode-wrt-local-routes.patch b/queue-4.19/ipvlan-l3mdev-fix-broken-l3s-mode-wrt-local-routes.patch

new file mode 100644 (file)

index 0000000..ede857c
--- /dev/null
+++ b/queue-4.19/ipvlan-l3mdev-fix-broken-l3s-mode-wrt-local-routes.patch
@@ -0,0 +1,147 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 30 Jan 2019 12:49:48 +0100
+Subject: ipvlan, l3mdev: fix broken l3s mode wrt local routes
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit d5256083f62e2720f75bb3c5a928a0afe47d6bc3 ]
+
+While implementing ipvlan l3 and l3s mode for kubernetes CNI plugin,
+I ran into the issue that while l3 mode is working fine, l3s mode
+does not have any connectivity to kube-apiserver and hence all pods
+end up in Error state as well. The ipvlan master device sits on
+top of a bond device and hostns traffic to kube-apiserver (also running
+in hostns) is DNATed from 10.152.183.1:443 to 139.178.29.207:37573
+where the latter is the address of the bond0. While in l3 mode, a
+curl to https://10.152.183.1:443 or to https://139.178.29.207:37573
+works fine from hostns, neither of them do in case of l3s. In the
+latter only a curl to https://127.0.0.1:37573 appeared to work where
+for local addresses of bond0 I saw kernel suddenly starting to emit
+ARP requests to query HW address of bond0 which remained unanswered
+and neighbor entries in INCOMPLETE state. These ARP requests only
+happen while in l3s.
+
+Debugging this further, I found the issue is that l3s mode is piggy-
+backing on l3 master device, and in this case local routes are using
+l3mdev_master_dev_rcu(dev) instead of net->loopback_dev as per commit
+f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev
+if relevant") and 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be
+a loopback"). I found that reverting them back into using the
+net->loopback_dev fixed ipvlan l3s connectivity and got everything
+working for the CNI.
+
+Now judging from 4fbae7d83c98 ("ipvlan: Introduce l3s mode") and the
+l3mdev paper in [0] the only sole reason why ipvlan l3s is relying
+on l3 master device is to get the l3mdev_ip_rcv() receive hook for
+setting the dst entry of the input route without adding its own
+ipvlan specific hacks into the receive path, however, any l3 domain
+semantics beyond just that are breaking l3s operation. Note that
+ipvlan also has the ability to dynamically switch its internal
+operation from l3 to l3s for all ports via ipvlan_set_port_mode()
+at runtime. In any case, l3 vs l3s soley distinguishes itself by
+'de-confusing' netfilter through switching skb->dev to ipvlan slave
+device late in NF_INET_LOCAL_IN before handing the skb to L4.
+
+Minimal fix taken here is to add a IFF_L3MDEV_RX_HANDLER flag which,
+if set from ipvlan setup, gets us only the wanted l3mdev_l3_rcv() hook
+without any additional l3mdev semantics on top. This should also have
+minimal impact since dev->priv_flags is already hot in cache. With
+this set, l3s mode is working fine and I also get things like
+masquerading pod traffic on the ipvlan master properly working.
+
+  [0] https://netdevconf.org/1.2/papers/ahern-what-is-l3mdev-paper.pdf
+
+Fixes: f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant")
+Fixes: 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be a loopback")
+Fixes: 4fbae7d83c98 ("ipvlan: Introduce l3s mode")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Cc: Mahesh Bandewar <maheshb@google.com>
+Cc: David Ahern <dsa@cumulusnetworks.com>
+Cc: Florian Westphal <fw@strlen.de>
+Cc: Martynas Pumputis <m@lambda.lt>
+Acked-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipvlan/ipvlan_main.c |    6 +++---
+ include/linux/netdevice.h        |    8 ++++++++
+ include/net/l3mdev.h             |    3 ++-
+ 3 files changed, 13 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ipvlan/ipvlan_main.c
++++ b/drivers/net/ipvlan/ipvlan_main.c
+@@ -97,12 +97,12 @@ static int ipvlan_set_port_mode(struct i
+                       err = ipvlan_register_nf_hook(read_pnet(&port->pnet));
+                       if (!err) {
+                               mdev->l3mdev_ops = &ipvl_l3mdev_ops;
+-                              mdev->priv_flags |= IFF_L3MDEV_MASTER;
++                              mdev->priv_flags |= IFF_L3MDEV_RX_HANDLER;
+                       } else
+                               goto fail;
+               } else if (port->mode == IPVLAN_MODE_L3S) {
+                       /* Old mode was L3S */
+-                      mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
++                      mdev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER;
+                       ipvlan_unregister_nf_hook(read_pnet(&port->pnet));
+                       mdev->l3mdev_ops = NULL;
+               }
+@@ -162,7 +162,7 @@ static void ipvlan_port_destroy(struct n
+       struct sk_buff *skb;
+ 
+       if (port->mode == IPVLAN_MODE_L3S) {
+-              dev->priv_flags &= ~IFF_L3MDEV_MASTER;
++              dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER;
+               ipvlan_unregister_nf_hook(dev_net(dev));
+               dev->l3mdev_ops = NULL;
+       }
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1455,6 +1455,7 @@ struct net_device_ops {
+  * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook
+  * @IFF_FAILOVER: device is a failover master device
+  * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
++ * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
+  */
+ enum netdev_priv_flags {
+       IFF_802_1Q_VLAN                 = 1<<0,
+@@ -1486,6 +1487,7 @@ enum netdev_priv_flags {
+       IFF_NO_RX_HANDLER               = 1<<26,
+       IFF_FAILOVER                    = 1<<27,
+       IFF_FAILOVER_SLAVE              = 1<<28,
++      IFF_L3MDEV_RX_HANDLER           = 1<<29,
+ };
+ 
+ #define IFF_802_1Q_VLAN                       IFF_802_1Q_VLAN
+@@ -1516,6 +1518,7 @@ enum netdev_priv_flags {
+ #define IFF_NO_RX_HANDLER             IFF_NO_RX_HANDLER
+ #define IFF_FAILOVER                  IFF_FAILOVER
+ #define IFF_FAILOVER_SLAVE            IFF_FAILOVER_SLAVE
++#define IFF_L3MDEV_RX_HANDLER         IFF_L3MDEV_RX_HANDLER
+ 
+ /**
+  *    struct net_device - The DEVICE structure.
+@@ -4464,6 +4467,11 @@ static inline bool netif_supports_nofcs(
+       return dev->priv_flags & IFF_SUPP_NOFCS;
+ }
+ 
++static inline bool netif_has_l3_rx_handler(const struct net_device *dev)
++{
++      return dev->priv_flags & IFF_L3MDEV_RX_HANDLER;
++}
++
+ static inline bool netif_is_l3_master(const struct net_device *dev)
+ {
+       return dev->priv_flags & IFF_L3MDEV_MASTER;
+--- a/include/net/l3mdev.h
++++ b/include/net/l3mdev.h
+@@ -142,7 +142,8 @@ struct sk_buff *l3mdev_l3_rcv(struct sk_
+ 
+       if (netif_is_l3_slave(skb->dev))
+               master = netdev_master_upper_dev_get_rcu(skb->dev);
+-      else if (netif_is_l3_master(skb->dev))
++      else if (netif_is_l3_master(skb->dev) ||
++               netif_has_l3_rx_handler(skb->dev))
+               master = skb->dev;
+ 
+       if (master && master->l3mdev_ops->l3mdev_l3_rcv)
diff --git a/queue-4.19/l2tp-copy-4-more-bytes-to-linear-part-if-necessary.patch b/queue-4.19/l2tp-copy-4-more-bytes-to-linear-part-if-necessary.patch

new file mode 100644 (file)

index 0000000..003e2fc
--- /dev/null
+++ b/queue-4.19/l2tp-copy-4-more-bytes-to-linear-part-if-necessary.patch
@@ -0,0 +1,51 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Jacob Wen <jian.w.wen@oracle.com>
+Date: Thu, 31 Jan 2019 15:18:56 +0800
+Subject: l2tp: copy 4 more bytes to linear part if necessary
+
+From: Jacob Wen <jian.w.wen@oracle.com>
+
+[ Upstream commit 91c524708de6207f59dd3512518d8a1c7b434ee3 ]
+
+The size of L2TPv2 header with all optional fields is 14 bytes.
+l2tp_udp_recv_core only moves 10 bytes to the linear part of a
+skb. This may lead to l2tp_recv_common read data outside of a skb.
+
+This patch make sure that there is at least 14 bytes in the linear
+part of a skb to meet the maximum need of l2tp_udp_recv_core and
+l2tp_recv_common. The minimum size of both PPP HDLC-like frame and
+Ethernet frame is larger than 14 bytes, so we are safe to do so.
+
+Also remove L2TP_HDR_SIZE_NOSEQ, it is unused now.
+
+Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
+Suggested-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: Jacob Wen <jian.w.wen@oracle.com>
+Acked-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_core.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -83,8 +83,7 @@
+ #define L2TP_SLFLAG_S    0x40000000
+ #define L2TP_SL_SEQ_MASK   0x00ffffff
+ 
+-#define L2TP_HDR_SIZE_SEQ             10
+-#define L2TP_HDR_SIZE_NOSEQ           6
++#define L2TP_HDR_SIZE_MAX             14
+ 
+ /* Default trace flags */
+ #define L2TP_DEFAULT_DEBUG_FLAGS      0
+@@ -808,7 +807,7 @@ static int l2tp_udp_recv_core(struct l2t
+       __skb_pull(skb, sizeof(struct udphdr));
+ 
+       /* Short packet? */
+-      if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
++      if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {
+               l2tp_info(tunnel, L2TP_MSG_DATA,
+                         "%s: recv short packet (len=%d)\n",
+                         tunnel->name, skb->len);
diff --git a/queue-4.19/l2tp-fix-reading-optional-fields-of-l2tpv3.patch b/queue-4.19/l2tp-fix-reading-optional-fields-of-l2tpv3.patch

new file mode 100644 (file)

index 0000000..eb17af9
--- /dev/null
+++ b/queue-4.19/l2tp-fix-reading-optional-fields-of-l2tpv3.patch
@@ -0,0 +1,112 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Jacob Wen <jian.w.wen@oracle.com>
+Date: Wed, 30 Jan 2019 14:55:14 +0800
+Subject: l2tp: fix reading optional fields of L2TPv3
+
+From: Jacob Wen <jian.w.wen@oracle.com>
+
+[ Upstream commit 4522a70db7aa5e77526a4079628578599821b193 ]
+
+Use pskb_may_pull() to make sure the optional fields are in skb linear
+parts, so we can safely read them later.
+
+It's easy to reproduce the issue with a net driver that supports paged
+skb data. Just create a L2TPv3 over IP tunnel and then generates some
+network traffic.
+Once reproduced, rx err in /sys/kernel/debug/l2tp/tunnels will increase.
+
+Changes in v4:
+1. s/l2tp_v3_pull_opt/l2tp_v3_ensure_opt_in_linear/
+2. s/tunnel->version != L2TP_HDR_VER_2/tunnel->version == L2TP_HDR_VER_3/
+3. Add 'Fixes' in commit messages.
+
+Changes in v3:
+1. To keep consistency, move the code out of l2tp_recv_common.
+2. Use "net" instead of "net-next", since this is a bug fix.
+
+Changes in v2:
+1. Only fix L2TPv3 to make code simple.
+   To fix both L2TPv3 and L2TPv2, we'd better refactor l2tp_recv_common.
+   It's complicated to do so.
+2. Reloading pointers after pskb_may_pull
+
+Fixes: f7faffa3ff8e ("l2tp: Add L2TPv3 protocol support")
+Fixes: 0d76751fad77 ("l2tp: Add L2TPv3 IP encapsulation (no UDP) support")
+Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6")
+Signed-off-by: Jacob Wen <jian.w.wen@oracle.com>
+Acked-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_core.c |    4 ++++
+ net/l2tp/l2tp_core.h |   20 ++++++++++++++++++++
+ net/l2tp/l2tp_ip.c   |    3 +++
+ net/l2tp/l2tp_ip6.c  |    3 +++
+ 4 files changed, 30 insertions(+)
+
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -883,6 +883,10 @@ static int l2tp_udp_recv_core(struct l2t
+               goto error;
+       }
+ 
++      if (tunnel->version == L2TP_HDR_VER_3 &&
++          l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
++              goto error;
++
+       l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
+       l2tp_session_dec_refcount(session);
+ 
+--- a/net/l2tp/l2tp_core.h
++++ b/net/l2tp/l2tp_core.h
+@@ -301,6 +301,26 @@ static inline bool l2tp_tunnel_uses_xfrm
+ }
+ #endif
+ 
++static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb,
++                                             unsigned char **ptr, unsigned char **optr)
++{
++      int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session);
++
++      if (opt_len > 0) {
++              int off = *ptr - *optr;
++
++              if (!pskb_may_pull(skb, off + opt_len))
++                      return -1;
++
++              if (skb->data != *optr) {
++                      *optr = skb->data;
++                      *ptr = skb->data + off;
++              }
++      }
++
++      return 0;
++}
++
+ #define l2tp_printk(ptr, type, func, fmt, ...)                                \
+ do {                                                                  \
+       if (((ptr)->debug) & (type))                                    \
+--- a/net/l2tp/l2tp_ip.c
++++ b/net/l2tp/l2tp_ip.c
+@@ -165,6 +165,9 @@ static int l2tp_ip_recv(struct sk_buff *
+               print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
+       }
+ 
++      if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
++              goto discard_sess;
++
+       l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
+       l2tp_session_dec_refcount(session);
+ 
+--- a/net/l2tp/l2tp_ip6.c
++++ b/net/l2tp/l2tp_ip6.c
+@@ -178,6 +178,9 @@ static int l2tp_ip6_recv(struct sk_buff
+               print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
+       }
+ 
++      if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
++              goto discard_sess;
++
+       l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
+       l2tp_session_dec_refcount(session);
+ 
diff --git a/queue-4.19/net-ip6_gre-always-reports-o_key-to-userspace.patch b/queue-4.19/net-ip6_gre-always-reports-o_key-to-userspace.patch

new file mode 100644 (file)

index 0000000..53f956f
--- /dev/null
+++ b/queue-4.19/net-ip6_gre-always-reports-o_key-to-userspace.patch
@@ -0,0 +1,56 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Date: Mon, 28 Jan 2019 22:23:49 +0100
+Subject: net: ip6_gre: always reports o_key to userspace
+
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+
+[ Upstream commit c706863bc8902d0c2d1a5a27ac8e1ead5d06b79d ]
+
+As Erspan_v4, Erspan_v6 protocol relies on o_key to configure
+session id header field. However TUNNEL_KEY bit is cleared in
+ip6erspan_tunnel_xmit since ERSPAN protocol does not set the key field
+of the external GRE header and so the configured o_key is not reported
+to userspace. The issue can be triggered with the following reproducer:
+
+$ip link add ip6erspan1 type ip6erspan local 2000::1 remote 2000::2 \
+    key 1 seq erspan_ver 1
+$ip link set ip6erspan1 up
+ip -d link sh ip6erspan1
+
+ip6erspan1@NONE: <BROADCAST,MULTICAST> mtu 1422 qdisc noop state DOWN mode DEFAULT
+    link/ether ba:ff:09:24:c3:0e brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500
+    ip6erspan remote 2000::2 local 2000::1 encaplimit 4 flowlabel 0x00000 ikey 0.0.0.1 iseq oseq
+
+Fix the issue adding TUNNEL_KEY bit to the o_flags parameter in
+ip6gre_fill_info
+
+Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
+Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -2120,12 +2120,17 @@ static int ip6gre_fill_info(struct sk_bu
+ {
+       struct ip6_tnl *t = netdev_priv(dev);
+       struct __ip6_tnl_parm *p = &t->parms;
++      __be16 o_flags = p->o_flags;
++
++      if ((p->erspan_ver == 1 || p->erspan_ver == 2) &&
++          !p->collect_md)
++              o_flags |= TUNNEL_KEY;
+ 
+       if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
+           nla_put_be16(skb, IFLA_GRE_IFLAGS,
+                        gre_tnl_flags_to_gre_flags(p->i_flags)) ||
+           nla_put_be16(skb, IFLA_GRE_OFLAGS,
+-                       gre_tnl_flags_to_gre_flags(p->o_flags)) ||
++                       gre_tnl_flags_to_gre_flags(o_flags)) ||
+           nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
+           nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
+           nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
diff --git a/queue-4.19/net-ip_gre-always-reports-o_key-to-userspace.patch b/queue-4.19/net-ip_gre-always-reports-o_key-to-userspace.patch

new file mode 100644 (file)

index 0000000..9db7e58
--- /dev/null
+++ b/queue-4.19/net-ip_gre-always-reports-o_key-to-userspace.patch
@@ -0,0 +1,56 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Date: Mon, 28 Jan 2019 22:23:48 +0100
+Subject: net: ip_gre: always reports o_key to userspace
+
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+
+[ Upstream commit feaf5c796b3f0240f10d0d6d0b686715fd58a05b ]
+
+Erspan protocol (version 1 and 2) relies on o_key to configure
+session id header field. However TUNNEL_KEY bit is cleared in
+erspan_xmit since ERSPAN protocol does not set the key field
+of the external GRE header and so the configured o_key is not reported
+to userspace. The issue can be triggered with the following reproducer:
+
+$ip link add erspan1 type erspan local 192.168.0.1 remote 192.168.0.2 \
+    key 1 seq erspan_ver 1
+$ip link set erspan1 up
+$ip -d link sh erspan1
+
+erspan1@NONE: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc pfifo_fast state UNKNOWN mode DEFAULT
+  link/ether 52:aa:99:95:9a:b5 brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500
+  erspan remote 192.168.0.2 local 192.168.0.1 ttl inherit ikey 0.0.0.1 iseq oseq erspan_index 0
+
+Fix the issue adding TUNNEL_KEY bit to the o_flags parameter in
+ipgre_fill_info
+
+Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN")
+Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_gre.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -1471,12 +1471,17 @@ static int ipgre_fill_info(struct sk_buf
+ {
+       struct ip_tunnel *t = netdev_priv(dev);
+       struct ip_tunnel_parm *p = &t->parms;
++      __be16 o_flags = p->o_flags;
++
++      if ((t->erspan_ver == 1 || t->erspan_ver == 2) &&
++          !t->collect_md)
++              o_flags |= TUNNEL_KEY;
+ 
+       if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
+           nla_put_be16(skb, IFLA_GRE_IFLAGS,
+                        gre_tnl_flags_to_gre_flags(p->i_flags)) ||
+           nla_put_be16(skb, IFLA_GRE_OFLAGS,
+-                       gre_tnl_flags_to_gre_flags(p->o_flags)) ||
++                       gre_tnl_flags_to_gre_flags(o_flags)) ||
+           nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
+           nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
+           nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
diff --git a/queue-4.19/net-ip_gre-use-erspan-key-field-for-tunnel-lookup.patch b/queue-4.19/net-ip_gre-use-erspan-key-field-for-tunnel-lookup.patch

new file mode 100644 (file)

index 0000000..8820412
--- /dev/null
+++ b/queue-4.19/net-ip_gre-use-erspan-key-field-for-tunnel-lookup.patch
@@ -0,0 +1,97 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Date: Fri, 18 Jan 2019 12:05:39 +0100
+Subject: net: ip_gre: use erspan key field for tunnel lookup
+
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+
+[ Upstream commit cb73ee40b1b381eaf3749e6dbeed567bb38e5258 ]
+
+Use ERSPAN key header field as tunnel key in gre_parse_header routine
+since ERSPAN protocol sets the key field of the external GRE header to
+0 resulting in a tunnel lookup fail in ip6gre_err.
+In addition remove key field parsing and pskb_may_pull check in
+erspan_rcv and ip6erspan_rcv
+
+Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
+Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/gre_demux.c |   17 +++++++++++++++++
+ net/ipv4/ip_gre.c    |    9 ---------
+ net/ipv6/ip6_gre.c   |    4 ----
+ 3 files changed, 17 insertions(+), 13 deletions(-)
+
+--- a/net/ipv4/gre_demux.c
++++ b/net/ipv4/gre_demux.c
+@@ -25,6 +25,7 @@
+ #include <linux/spinlock.h>
+ #include <net/protocol.h>
+ #include <net/gre.h>
++#include <net/erspan.h>
+ 
+ #include <net/icmp.h>
+ #include <net/route.h>
+@@ -118,6 +119,22 @@ int gre_parse_header(struct sk_buff *skb
+                       hdr_len += 4;
+       }
+       tpi->hdr_len = hdr_len;
++
++      /* ERSPAN ver 1 and 2 protocol sets GRE key field
++       * to 0 and sets the configured key in the
++       * inner erspan header field
++       */
++      if (greh->protocol == htons(ETH_P_ERSPAN) ||
++          greh->protocol == htons(ETH_P_ERSPAN2)) {
++              struct erspan_base_hdr *ershdr;
++
++              if (!pskb_may_pull(skb, nhs + hdr_len + sizeof(*ershdr)))
++                      return -EINVAL;
++
++              ershdr = (struct erspan_base_hdr *)options;
++              tpi->key = cpu_to_be32(get_session_id(ershdr));
++      }
++
+       return hdr_len;
+ }
+ EXPORT_SYMBOL(gre_parse_header);
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -269,20 +269,11 @@ static int erspan_rcv(struct sk_buff *sk
+       int len;
+ 
+       itn = net_generic(net, erspan_net_id);
+-      len = gre_hdr_len + sizeof(*ershdr);
+-
+-      /* Check based hdr len */
+-      if (unlikely(!pskb_may_pull(skb, len)))
+-              return PACKET_REJECT;
+ 
+       iph = ip_hdr(skb);
+       ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
+       ver = ershdr->ver;
+ 
+-      /* The original GRE header does not have key field,
+-       * Use ERSPAN 10-bit session ID as key.
+-       */
+-      tpi->key = cpu_to_be32(get_session_id(ershdr));
+       tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
+                                 tpi->flags | TUNNEL_KEY,
+                                 iph->saddr, iph->daddr, tpi->key);
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -550,13 +550,9 @@ static int ip6erspan_rcv(struct sk_buff
+       struct ip6_tnl *tunnel;
+       u8 ver;
+ 
+-      if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+-              return PACKET_REJECT;
+-
+       ipv6h = ipv6_hdr(skb);
+       ershdr = (struct erspan_base_hdr *)skb->data;
+       ver = ershdr->ver;
+-      tpi->key = cpu_to_be32(get_session_id(ershdr));
+ 
+       tunnel = ip6gre_tunnel_lookup(skb->dev,
+                                     &ipv6h->saddr, &ipv6h->daddr, tpi->key,
diff --git a/queue-4.19/net-mlx4_core-add-masking-for-a-few-queries-on-hca-caps.patch b/queue-4.19/net-mlx4_core-add-masking-for-a-few-queries-on-hca-caps.patch

new file mode 100644 (file)

index 0000000..474192c
--- /dev/null
+++ b/queue-4.19/net-mlx4_core-add-masking-for-a-few-queries-on-hca-caps.patch
@@ -0,0 +1,142 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Aya Levin <ayal@mellanox.com>
+Date: Tue, 22 Jan 2019 15:19:44 +0200
+Subject: net/mlx4_core: Add masking for a few queries on HCA caps
+
+From: Aya Levin <ayal@mellanox.com>
+
+[ Upstream commit a40ded6043658444ee4dd6ee374119e4e98b33fc ]
+
+Driver reads the query HCA capabilities without the corresponding masks.
+Without the correct masks, the base addresses of the queues are
+unaligned.  In addition some reserved bits were wrongly read.  Using the
+correct masks, ensures alignment of the base addresses and allows future
+firmware versions safe use of the reserved bits.
+
+Fixes: ab9c17a009ee ("mlx4_core: Modify driver initialization flow to accommodate SRIOV for Ethernet")
+Fixes: 0ff1fb654bec ("{NET, IB}/mlx4: Add device managed flow steering firmware API")
+Signed-off-by: Aya Levin <ayal@mellanox.com>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/fw.c |   75 +++++++++++++++++++-------------
+ 1 file changed, 46 insertions(+), 29 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
++++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
+@@ -2064,9 +2064,11 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+ {
+       struct mlx4_cmd_mailbox *mailbox;
+       __be32 *outbox;
++      u64 qword_field;
+       u32 dword_field;
+-      int err;
++      u16 word_field;
+       u8 byte_field;
++      int err;
+       static const u8 a0_dmfs_query_hw_steering[] =  {
+               [0] = MLX4_STEERING_DMFS_A0_DEFAULT,
+               [1] = MLX4_STEERING_DMFS_A0_DYNAMIC,
+@@ -2094,19 +2096,32 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+ 
+       /* QPC/EEC/CQC/EQC/RDMARC attributes */
+ 
+-      MLX4_GET(param->qpc_base,      outbox, INIT_HCA_QPC_BASE_OFFSET);
+-      MLX4_GET(param->log_num_qps,   outbox, INIT_HCA_LOG_QP_OFFSET);
+-      MLX4_GET(param->srqc_base,     outbox, INIT_HCA_SRQC_BASE_OFFSET);
+-      MLX4_GET(param->log_num_srqs,  outbox, INIT_HCA_LOG_SRQ_OFFSET);
+-      MLX4_GET(param->cqc_base,      outbox, INIT_HCA_CQC_BASE_OFFSET);
+-      MLX4_GET(param->log_num_cqs,   outbox, INIT_HCA_LOG_CQ_OFFSET);
+-      MLX4_GET(param->altc_base,     outbox, INIT_HCA_ALTC_BASE_OFFSET);
+-      MLX4_GET(param->auxc_base,     outbox, INIT_HCA_AUXC_BASE_OFFSET);
+-      MLX4_GET(param->eqc_base,      outbox, INIT_HCA_EQC_BASE_OFFSET);
+-      MLX4_GET(param->log_num_eqs,   outbox, INIT_HCA_LOG_EQ_OFFSET);
+-      MLX4_GET(param->num_sys_eqs,   outbox, INIT_HCA_NUM_SYS_EQS_OFFSET);
+-      MLX4_GET(param->rdmarc_base,   outbox, INIT_HCA_RDMARC_BASE_OFFSET);
+-      MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET);
++      MLX4_GET(qword_field, outbox, INIT_HCA_QPC_BASE_OFFSET);
++      param->qpc_base = qword_field & ~((u64)0x1f);
++      MLX4_GET(byte_field, outbox, INIT_HCA_LOG_QP_OFFSET);
++      param->log_num_qps = byte_field & 0x1f;
++      MLX4_GET(qword_field, outbox, INIT_HCA_SRQC_BASE_OFFSET);
++      param->srqc_base = qword_field & ~((u64)0x1f);
++      MLX4_GET(byte_field, outbox, INIT_HCA_LOG_SRQ_OFFSET);
++      param->log_num_srqs = byte_field & 0x1f;
++      MLX4_GET(qword_field, outbox, INIT_HCA_CQC_BASE_OFFSET);
++      param->cqc_base = qword_field & ~((u64)0x1f);
++      MLX4_GET(byte_field, outbox, INIT_HCA_LOG_CQ_OFFSET);
++      param->log_num_cqs = byte_field & 0x1f;
++      MLX4_GET(qword_field, outbox, INIT_HCA_ALTC_BASE_OFFSET);
++      param->altc_base = qword_field;
++      MLX4_GET(qword_field, outbox, INIT_HCA_AUXC_BASE_OFFSET);
++      param->auxc_base = qword_field;
++      MLX4_GET(qword_field, outbox, INIT_HCA_EQC_BASE_OFFSET);
++      param->eqc_base = qword_field & ~((u64)0x1f);
++      MLX4_GET(byte_field, outbox, INIT_HCA_LOG_EQ_OFFSET);
++      param->log_num_eqs = byte_field & 0x1f;
++      MLX4_GET(word_field, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET);
++      param->num_sys_eqs = word_field & 0xfff;
++      MLX4_GET(qword_field, outbox, INIT_HCA_RDMARC_BASE_OFFSET);
++      param->rdmarc_base = qword_field & ~((u64)0x1f);
++      MLX4_GET(byte_field, outbox, INIT_HCA_LOG_RD_OFFSET);
++      param->log_rd_per_qp = byte_field & 0x7;
+ 
+       MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET);
+       if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) {
+@@ -2125,22 +2140,21 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+       /* steering attributes */
+       if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+               MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET);
+-              MLX4_GET(param->log_mc_entry_sz, outbox,
+-                       INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
+-              MLX4_GET(param->log_mc_table_sz, outbox,
+-                       INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
+-              MLX4_GET(byte_field, outbox,
+-                       INIT_HCA_FS_A0_OFFSET);
++              MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
++              param->log_mc_entry_sz = byte_field & 0x1f;
++              MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
++              param->log_mc_table_sz = byte_field & 0x1f;
++              MLX4_GET(byte_field, outbox, INIT_HCA_FS_A0_OFFSET);
+               param->dmfs_high_steer_mode =
+                       a0_dmfs_query_hw_steering[(byte_field >> 6) & 3];
+       } else {
+               MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET);
+-              MLX4_GET(param->log_mc_entry_sz, outbox,
+-                       INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+-              MLX4_GET(param->log_mc_hash_sz,  outbox,
+-                       INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+-              MLX4_GET(param->log_mc_table_sz, outbox,
+-                       INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
++              MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
++              param->log_mc_entry_sz = byte_field & 0x1f;
++              MLX4_GET(byte_field,  outbox, INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
++              param->log_mc_hash_sz = byte_field & 0x1f;
++              MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
++              param->log_mc_table_sz = byte_field & 0x1f;
+       }
+ 
+       /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
+@@ -2164,15 +2178,18 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+       /* TPT attributes */
+ 
+       MLX4_GET(param->dmpt_base,  outbox, INIT_HCA_DMPT_BASE_OFFSET);
+-      MLX4_GET(param->mw_enabled, outbox, INIT_HCA_TPT_MW_OFFSET);
+-      MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET);
++      MLX4_GET(byte_field, outbox, INIT_HCA_TPT_MW_OFFSET);
++      param->mw_enabled = byte_field >> 7;
++      MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET);
++      param->log_mpt_sz = byte_field & 0x3f;
+       MLX4_GET(param->mtt_base,   outbox, INIT_HCA_MTT_BASE_OFFSET);
+       MLX4_GET(param->cmpt_base,  outbox, INIT_HCA_CMPT_BASE_OFFSET);
+ 
+       /* UAR attributes */
+ 
+       MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET);
+-      MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET);
++      MLX4_GET(byte_field, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET);
++      param->log_uar_sz = byte_field & 0xf;
+ 
+       /* phv_check enable */
+       MLX4_GET(byte_field, outbox, INIT_HCA_CACHELINE_SZ_OFFSET);
diff --git a/queue-4.19/net-mlx5e-allow-mac-invalidation-while-spoofchk-is-on.patch b/queue-4.19/net-mlx5e-allow-mac-invalidation-while-spoofchk-is-on.patch

new file mode 100644 (file)

index 0000000..422cdab
--- /dev/null
+++ b/queue-4.19/net-mlx5e-allow-mac-invalidation-while-spoofchk-is-on.patch
@@ -0,0 +1,67 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Aya Levin <ayal@mellanox.com>
+Date: Mon, 24 Dec 2018 09:48:42 +0200
+Subject: net/mlx5e: Allow MAC invalidation while spoofchk is ON
+
+From: Aya Levin <ayal@mellanox.com>
+
+[ Upstream commit 9d2cbdc5d334967c35b5f58c7bf3208e17325647 ]
+
+Prior to this patch the driver prohibited spoof checking on invalid MAC.
+Now the user can set this configuration if it wishes to.
+
+This is required since libvirt might invalidate the VF Mac by setting it
+to zero, while spoofcheck is ON.
+
+Fixes: 1ab2068a4c66 ("net/mlx5: Implement vports admin state backup/restore")
+Signed-off-by: Aya Levin <ayal@mellanox.com>
+Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.c |   18 ++++++------------
+ 1 file changed, 6 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -1133,13 +1133,6 @@ static int esw_vport_ingress_config(stru
+       int err = 0;
+       u8 *smac_v;
+ 
+-      if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) {
+-              mlx5_core_warn(esw->dev,
+-                             "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
+-                             vport->vport);
+-              return -EPERM;
+-      }
+-
+       esw_vport_cleanup_ingress_rules(esw, vport);
+ 
+       if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
+@@ -1812,13 +1805,10 @@ int mlx5_eswitch_set_vport_mac(struct ml
+       mutex_lock(&esw->state_lock);
+       evport = &esw->vports[vport];
+ 
+-      if (evport->info.spoofchk && !is_valid_ether_addr(mac)) {
++      if (evport->info.spoofchk && !is_valid_ether_addr(mac))
+               mlx5_core_warn(esw->dev,
+-                             "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n",
++                             "Set invalid MAC while spoofchk is on, vport(%d)\n",
+                              vport);
+-              err = -EPERM;
+-              goto unlock;
+-      }
+ 
+       err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
+       if (err) {
+@@ -1964,6 +1954,10 @@ int mlx5_eswitch_set_vport_spoofchk(stru
+       evport = &esw->vports[vport];
+       pschk = evport->info.spoofchk;
+       evport->info.spoofchk = spoofchk;
++      if (pschk && !is_valid_ether_addr(evport->info.mac))
++              mlx5_core_warn(esw->dev,
++                             "Spoofchk in set while MAC is invalid, vport(%d)\n",
++                             evport->vport);
+       if (evport->enabled && esw->mode == SRIOV_LEGACY)
+               err = esw_vport_ingress_config(esw, evport);
+       if (err)
diff --git a/queue-4.19/net-rose-fix-null-ax25_cb-kernel-panic.patch b/queue-4.19/net-rose-fix-null-ax25_cb-kernel-panic.patch

new file mode 100644 (file)

index 0000000..b0e8d73
--- /dev/null
+++ b/queue-4.19/net-rose-fix-null-ax25_cb-kernel-panic.patch
@@ -0,0 +1,66 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Bernard Pidoux <f6bvp@free.fr>
+Date: Fri, 25 Jan 2019 11:46:40 +0100
+Subject: net/rose: fix NULL ax25_cb kernel panic
+
+From: Bernard Pidoux <f6bvp@free.fr>
+
+[ Upstream commit b0cf029234f9b18e10703ba5147f0389c382bccc ]
+
+When an internally generated frame is handled by rose_xmit(),
+rose_route_frame() is called:
+
+        if (!rose_route_frame(skb, NULL)) {
+                dev_kfree_skb(skb);
+                stats->tx_errors++;
+                return NETDEV_TX_OK;
+        }
+
+We have the same code sequence in Net/Rom where an internally generated
+frame is handled by nr_xmit() calling nr_route_frame(skb, NULL).
+However, in this function NULL argument is tested while it is not in
+rose_route_frame().
+Then kernel panic occurs later on when calling ax25cmp() with a NULL
+ax25_cb argument as reported many times and recently with syzbot.
+
+We need to test if ax25 is NULL before using it.
+
+Testing:
+Built kernel with CONFIG_ROSE=y.
+
+Signed-off-by: Bernard Pidoux <f6bvp@free.fr>
+Acked-by: Dmitry Vyukov <dvyukov@google.com>
+Reported-by: syzbot+1a2c456a1ea08fa5b5f7@syzkaller.appspotmail.com
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Ralf Baechle <ralf@linux-mips.org>
+Cc: Bernard Pidoux <f6bvp@free.fr>
+Cc: linux-hams@vger.kernel.org
+Cc: netdev@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rose/rose_route.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/net/rose/rose_route.c
++++ b/net/rose/rose_route.c
+@@ -850,6 +850,7 @@ void rose_link_device_down(struct net_de
+ 
+ /*
+  *    Route a frame to an appropriate AX.25 connection.
++ *    A NULL ax25_cb indicates an internally generated frame.
+  */
+ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
+ {
+@@ -867,6 +868,10 @@ int rose_route_frame(struct sk_buff *skb
+ 
+       if (skb->len < ROSE_MIN_LEN)
+               return res;
++
++      if (!ax25)
++              return rose_loopback_queue(skb, NULL);
++
+       frametype = skb->data[2];
+       lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
+       if (frametype == ROSE_CALL_REQUEST &&
diff --git a/queue-4.19/net-set-default-network-namespace-in-init_dummy_netdev.patch b/queue-4.19/net-set-default-network-namespace-in-init_dummy_netdev.patch

new file mode 100644 (file)

index 0000000..3439564
--- /dev/null
+++ b/queue-4.19/net-set-default-network-namespace-in-init_dummy_netdev.patch
@@ -0,0 +1,43 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Josh Elsasser <jelsasser@appneta.com>
+Date: Sat, 26 Jan 2019 14:38:33 -0800
+Subject: net: set default network namespace in init_dummy_netdev()
+
+From: Josh Elsasser <jelsasser@appneta.com>
+
+[ Upstream commit 35edfdc77f683c8fd27d7732af06cf6489af60a5 ]
+
+Assign a default net namespace to netdevs created by init_dummy_netdev().
+Fixes a NULL pointer dereference caused by busy-polling a socket bound to
+an iwlwifi wireless device, which bumps the per-net BUSYPOLLRXPACKETS stat
+if napi_poll() received packets:
+
+  BUG: unable to handle kernel NULL pointer dereference at 0000000000000190
+  IP: napi_busy_loop+0xd6/0x200
+  Call Trace:
+    sock_poll+0x5e/0x80
+    do_sys_poll+0x324/0x5a0
+    SyS_poll+0x6c/0xf0
+    do_syscall_64+0x6b/0x1f0
+    entry_SYSCALL_64_after_hwframe+0x3d/0xa2
+
+Fixes: 7db6b048da3b ("net: Commonize busy polling code to focus on napi_id instead of socket")
+Signed-off-by: Josh Elsasser <jelsasser@appneta.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -8599,6 +8599,9 @@ int init_dummy_netdev(struct net_device
+       set_bit(__LINK_STATE_PRESENT, &dev->state);
+       set_bit(__LINK_STATE_START, &dev->state);
+ 
++      /* napi_busy_loop stats accounting wants this */
++      dev_net_set(dev, &init_net);
++
+       /* Note : We dont allocate pcpu_refcnt for dummy devices,
+        * because users of this 'device' dont need to change
+        * its refcount.
diff --git a/queue-4.19/netrom-switch-to-sock-timer-api.patch b/queue-4.19/netrom-switch-to-sock-timer-api.patch

new file mode 100644 (file)

index 0000000..b0b043d
--- /dev/null
+++ b/queue-4.19/netrom-switch-to-sock-timer-api.patch
@@ -0,0 +1,95 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Thu, 24 Jan 2019 14:18:18 -0800
+Subject: netrom: switch to sock timer API
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 63346650c1a94a92be61a57416ac88c0a47c4327 ]
+
+sk_reset_timer() and sk_stop_timer() properly handle
+sock refcnt for timer function. Switching to them
+could fix a refcounting bug reported by syzbot.
+
+Reported-and-tested-by: syzbot+defa700d16f1bd1b9a05@syzkaller.appspotmail.com
+Cc: Ralf Baechle <ralf@linux-mips.org>
+Cc: linux-hams@vger.kernel.org
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netrom/nr_timer.c |   20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/net/netrom/nr_timer.c
++++ b/net/netrom/nr_timer.c
+@@ -52,21 +52,21 @@ void nr_start_t1timer(struct sock *sk)
+ {
+       struct nr_sock *nr = nr_sk(sk);
+ 
+-      mod_timer(&nr->t1timer, jiffies + nr->t1);
++      sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1);
+ }
+ 
+ void nr_start_t2timer(struct sock *sk)
+ {
+       struct nr_sock *nr = nr_sk(sk);
+ 
+-      mod_timer(&nr->t2timer, jiffies + nr->t2);
++      sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2);
+ }
+ 
+ void nr_start_t4timer(struct sock *sk)
+ {
+       struct nr_sock *nr = nr_sk(sk);
+ 
+-      mod_timer(&nr->t4timer, jiffies + nr->t4);
++      sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4);
+ }
+ 
+ void nr_start_idletimer(struct sock *sk)
+@@ -74,37 +74,37 @@ void nr_start_idletimer(struct sock *sk)
+       struct nr_sock *nr = nr_sk(sk);
+ 
+       if (nr->idle > 0)
+-              mod_timer(&nr->idletimer, jiffies + nr->idle);
++              sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle);
+ }
+ 
+ void nr_start_heartbeat(struct sock *sk)
+ {
+-      mod_timer(&sk->sk_timer, jiffies + 5 * HZ);
++      sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ);
+ }
+ 
+ void nr_stop_t1timer(struct sock *sk)
+ {
+-      del_timer(&nr_sk(sk)->t1timer);
++      sk_stop_timer(sk, &nr_sk(sk)->t1timer);
+ }
+ 
+ void nr_stop_t2timer(struct sock *sk)
+ {
+-      del_timer(&nr_sk(sk)->t2timer);
++      sk_stop_timer(sk, &nr_sk(sk)->t2timer);
+ }
+ 
+ void nr_stop_t4timer(struct sock *sk)
+ {
+-      del_timer(&nr_sk(sk)->t4timer);
++      sk_stop_timer(sk, &nr_sk(sk)->t4timer);
+ }
+ 
+ void nr_stop_idletimer(struct sock *sk)
+ {
+-      del_timer(&nr_sk(sk)->idletimer);
++      sk_stop_timer(sk, &nr_sk(sk)->idletimer);
+ }
+ 
+ void nr_stop_heartbeat(struct sock *sk)
+ {
+-      del_timer(&sk->sk_timer);
++      sk_stop_timer(sk, &sk->sk_timer);
+ }
+ 
+ int nr_t1timer_running(struct sock *sk)
diff --git a/queue-4.19/ravb-expand-rx-descriptor-data-to-accommodate-hw-checksum.patch b/queue-4.19/ravb-expand-rx-descriptor-data-to-accommodate-hw-checksum.patch

new file mode 100644 (file)

index 0000000..69d9846
--- /dev/null
+++ b/queue-4.19/ravb-expand-rx-descriptor-data-to-accommodate-hw-checksum.patch
@@ -0,0 +1,75 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Simon Horman <horms+renesas@verge.net.au>
+Date: Wed, 23 Jan 2019 12:14:52 +0100
+Subject: ravb: expand rx descriptor data to accommodate hw checksum
+
+From: Simon Horman <horms+renesas@verge.net.au>
+
+[ Upstream commit 12da64300fbc76b875900445f4146c3dc617d43e ]
+
+EtherAVB may provide a checksum of packet data appended to packet data. In
+order to allow this checksum to be received by the host descriptor data
+needs to be enlarged by 2 bytes to accommodate the checksum.
+
+In the case of MTU-sized packets without a VLAN tag the
+checksum were already accommodated by virtue of the space reserved for the
+VLAN tag. However, a packet of MTU-size with a  VLAN tag consumed all
+packet data space provided by a descriptor leaving no space for the
+trailing checksum.
+
+This was not detected by the driver which incorrectly used the last two
+bytes of packet data as the checksum and truncate the packet by two bytes.
+This resulted all such packets being dropped.
+
+A work around is to disable RX checksum offload
+ # ethtool -K eth0 rx off
+
+This patch resolves this problem by increasing the size available for
+packet data in RX descriptors by two bytes.
+
+Tested on R-Car E3 (r8a77990) ES1.0 based Ebisu-4D board
+
+v2
+* Use sizeof(__sum16) directly rather than adding a driver-local
+  #define for the size of the checksum provided by the hw (2 bytes).
+
+Fixes: 4d86d3818627 ("ravb: RX checksum offload")
+Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
+Reviewed-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/renesas/ravb_main.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/renesas/ravb_main.c
++++ b/drivers/net/ethernet/renesas/ravb_main.c
+@@ -344,7 +344,7 @@ static int ravb_ring_init(struct net_dev
+       int i;
+ 
+       priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
+-              ETH_HLEN + VLAN_HLEN;
++              ETH_HLEN + VLAN_HLEN + sizeof(__sum16);
+ 
+       /* Allocate RX and TX skb rings */
+       priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
+@@ -525,13 +525,15 @@ static void ravb_rx_csum(struct sk_buff
+ {
+       u8 *hw_csum;
+ 
+-      /* The hardware checksum is 2 bytes appended to packet data */
+-      if (unlikely(skb->len < 2))
++      /* The hardware checksum is contained in sizeof(__sum16) (2) bytes
++       * appended to packet data
++       */
++      if (unlikely(skb->len < sizeof(__sum16)))
+               return;
+-      hw_csum = skb_tail_pointer(skb) - 2;
++      hw_csum = skb_tail_pointer(skb) - sizeof(__sum16);
+       skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum));
+       skb->ip_summed = CHECKSUM_COMPLETE;
+-      skb_trim(skb, skb->len - 2);
++      skb_trim(skb, skb->len - sizeof(__sum16));
+ }
+ 
+ /* Packet receive function for Ethernet AVB */
diff --git a/queue-4.19/revert-net-mlx5e-e-switch-initialize-eswitch-only-if-eswitch-manager.patch b/queue-4.19/revert-net-mlx5e-e-switch-initialize-eswitch-only-if-eswitch-manager.patch

new file mode 100644 (file)

index 0000000..6663a0f
--- /dev/null
+++ b/queue-4.19/revert-net-mlx5e-e-switch-initialize-eswitch-only-if-eswitch-manager.patch
@@ -0,0 +1,54 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Bodong Wang <bodong@mellanox.com>
+Date: Sun, 13 Jan 2019 22:47:26 -0600
+Subject: Revert "net/mlx5e: E-Switch, Initialize eswitch only if eswitch manager"
+
+From: Bodong Wang <bodong@mellanox.com>
+
+[ Upstream commit 4e046de0f50e04acd48eb373d6a9061ddf014e0c ]
+
+This reverts commit 5f5991f36dce1e69dd8bd7495763eec2e28f08e7.
+
+With the original commit, eswitch instance will not be initialized for
+a function which is vport group manager but not eswitch manager such as
+host PF on SmartNIC (BlueField) card. This will result in a kernel crash
+when such a vport group manager is trying to access vports in its group.
+E.g, PF vport manager (not eswitch manager) tries to configure the MAC
+of its VF vport, a kernel trace will happen similar as bellow:
+
+ BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
+ ...
+ RIP: 0010:mlx5_eswitch_get_vport_config+0xc/0x180 [mlx5_core]
+ ...
+
+Fixes: 5f5991f36dce ("net/mlx5e: E-Switch, Initialize eswitch only if eswitch manager")
+Signed-off-by: Bodong Wang <bodong@mellanox.com>
+Reported-by: Yuval Avnery <yuvalav@mellanox.com>
+Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
+Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -1689,7 +1689,7 @@ int mlx5_eswitch_init(struct mlx5_core_d
+       int vport_num;
+       int err;
+ 
+-      if (!MLX5_ESWITCH_MANAGER(dev))
++      if (!MLX5_VPORT_MANAGER(dev))
+               return 0;
+ 
+       esw_info(dev,
+@@ -1758,7 +1758,7 @@ abort:
+ 
+ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
+ {
+-      if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev))
++      if (!esw || !MLX5_VPORT_MANAGER(esw->dev))
+               return;
+ 
+       esw_info(esw->dev, "cleanup\n");
diff --git a/queue-4.19/sctp-improve-the-events-for-sctp-stream-adding.patch b/queue-4.19/sctp-improve-the-events-for-sctp-stream-adding.patch

new file mode 100644 (file)

index 0000000..fa00e23
--- /dev/null
+++ b/queue-4.19/sctp-improve-the-events-for-sctp-stream-adding.patch
@@ -0,0 +1,71 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 22 Jan 2019 02:40:12 +0800
+Subject: sctp: improve the events for sctp stream adding
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 8220c870cb0f4eaa4e335c9645dbd9a1c461c1dd ]
+
+This patch is to improve sctp stream adding events in 2 places:
+
+  1. In sctp_process_strreset_addstrm_out(), move up SCTP_MAX_STREAM
+     and in stream allocation failure checks, as the adding has to
+     succeed after reconf_timer stops for the in stream adding
+     request retransmission.
+
+  3. In sctp_process_strreset_addstrm_in(), no event should be sent,
+     as no in or out stream is added here.
+
+Fixes: 50a41591f110 ("sctp: implement receiver-side procedures for the Add Outgoing Streams Request Parameter")
+Fixes: c5c4ebb3ab87 ("sctp: implement receiver-side procedures for the Add Incoming Streams Request Parameter")
+Reported-by: Ying Xu <yinxu@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/stream.c |   19 ++++++++-----------
+ 1 file changed, 8 insertions(+), 11 deletions(-)
+
+--- a/net/sctp/stream.c
++++ b/net/sctp/stream.c
+@@ -866,6 +866,14 @@ struct sctp_chunk *sctp_process_strreset
+       if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
+               goto out;
+ 
++      in = ntohs(addstrm->number_of_streams);
++      incnt = stream->incnt + in;
++      if (!in || incnt > SCTP_MAX_STREAM)
++              goto out;
++
++      if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
++              goto out;
++
+       if (asoc->strreset_chunk) {
+               if (!sctp_chunk_lookup_strreset_param(
+                       asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) {
+@@ -889,14 +897,6 @@ struct sctp_chunk *sctp_process_strreset
+               }
+       }
+ 
+-      in = ntohs(addstrm->number_of_streams);
+-      incnt = stream->incnt + in;
+-      if (!in || incnt > SCTP_MAX_STREAM)
+-              goto out;
+-
+-      if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
+-              goto out;
+-
+       stream->incnt = incnt;
+ 
+       result = SCTP_STRRESET_PERFORMED;
+@@ -966,9 +966,6 @@ struct sctp_chunk *sctp_process_strreset
+ 
+       result = SCTP_STRRESET_PERFORMED;
+ 
+-      *evp = sctp_ulpevent_make_stream_change_event(asoc,
+-              0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC);
+-
+ out:
+       sctp_update_strreset_result(asoc, result);
+ err:
diff --git a/queue-4.19/sctp-improve-the-events-for-sctp-stream-reset.patch b/queue-4.19/sctp-improve-the-events-for-sctp-stream-reset.patch

new file mode 100644 (file)

index 0000000..8cac876
--- /dev/null
+++ b/queue-4.19/sctp-improve-the-events-for-sctp-stream-reset.patch
@@ -0,0 +1,131 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 22 Jan 2019 02:39:34 +0800
+Subject: sctp: improve the events for sctp stream reset
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 2e6dc4d95110becfe0ff4c3d4749c33ea166e9e7 ]
+
+This patch is to improve sctp stream reset events in 4 places:
+
+  1. In sctp_process_strreset_outreq(), the flag should always be set with
+     SCTP_STREAM_RESET_INCOMING_SSN instead of OUTGOING, as receiver's in
+     stream is reset here.
+  2. In sctp_process_strreset_outreq(), move up SCTP_STRRESET_ERR_WRONG_SSN
+     check, as the reset has to succeed after reconf_timer stops for the
+     in stream reset request retransmission.
+  3. In sctp_process_strreset_inreq(), no event should be sent, as no in
+     or out stream is reset here.
+  4. In sctp_process_strreset_resp(), SCTP_STREAM_RESET_INCOMING_SSN or
+     OUTGOING event should always be sent for stream reset requests, no
+     matter it fails or succeeds to process the request.
+
+Fixes: 810544764536 ("sctp: implement receiver-side procedures for the Outgoing SSN Reset Request Parameter")
+Fixes: 16e1a91965b0 ("sctp: implement receiver-side procedures for the Incoming SSN Reset Request Parameter")
+Fixes: 11ae76e67a17 ("sctp: implement receiver-side procedures for the Reconf Response Parameter")
+Reported-by: Ying Xu <yinxu@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/stream.c |   39 +++++++++++++++++----------------------
+ 1 file changed, 17 insertions(+), 22 deletions(-)
+
+--- a/net/sctp/stream.c
++++ b/net/sctp/stream.c
+@@ -585,9 +585,9 @@ struct sctp_chunk *sctp_process_strreset
+       struct sctp_strreset_outreq *outreq = param.v;
+       struct sctp_stream *stream = &asoc->stream;
+       __u32 result = SCTP_STRRESET_DENIED;
+-      __u16 i, nums, flags = 0;
+       __be16 *str_p = NULL;
+       __u32 request_seq;
++      __u16 i, nums;
+ 
+       request_seq = ntohl(outreq->request_seq);
+ 
+@@ -615,6 +615,15 @@ struct sctp_chunk *sctp_process_strreset
+       if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ))
+               goto out;
+ 
++      nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16);
++      str_p = outreq->list_of_streams;
++      for (i = 0; i < nums; i++) {
++              if (ntohs(str_p[i]) >= stream->incnt) {
++                      result = SCTP_STRRESET_ERR_WRONG_SSN;
++                      goto out;
++              }
++      }
++
+       if (asoc->strreset_chunk) {
+               if (!sctp_chunk_lookup_strreset_param(
+                               asoc, outreq->response_seq,
+@@ -637,32 +646,19 @@ struct sctp_chunk *sctp_process_strreset
+                       sctp_chunk_put(asoc->strreset_chunk);
+                       asoc->strreset_chunk = NULL;
+               }
+-
+-              flags = SCTP_STREAM_RESET_INCOMING_SSN;
+       }
+ 
+-      nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16);
+-      if (nums) {
+-              str_p = outreq->list_of_streams;
+-              for (i = 0; i < nums; i++) {
+-                      if (ntohs(str_p[i]) >= stream->incnt) {
+-                              result = SCTP_STRRESET_ERR_WRONG_SSN;
+-                              goto out;
+-                      }
+-              }
+-
++      if (nums)
+               for (i = 0; i < nums; i++)
+                       SCTP_SI(stream, ntohs(str_p[i]))->mid = 0;
+-      } else {
++      else
+               for (i = 0; i < stream->incnt; i++)
+                       SCTP_SI(stream, i)->mid = 0;
+-      }
+ 
+       result = SCTP_STRRESET_PERFORMED;
+ 
+       *evp = sctp_ulpevent_make_stream_reset_event(asoc,
+-              flags | SCTP_STREAM_RESET_OUTGOING_SSN, nums, str_p,
+-              GFP_ATOMIC);
++              SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC);
+ 
+ out:
+       sctp_update_strreset_result(asoc, result);
+@@ -738,9 +734,6 @@ struct sctp_chunk *sctp_process_strreset
+ 
+       result = SCTP_STRRESET_PERFORMED;
+ 
+-      *evp = sctp_ulpevent_make_stream_reset_event(asoc,
+-              SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC);
+-
+ out:
+       sctp_update_strreset_result(asoc, result);
+ err:
+@@ -1036,10 +1029,10 @@ struct sctp_chunk *sctp_process_strreset
+                                       sout->mid_uo = 0;
+                               }
+                       }
+-
+-                      flags = SCTP_STREAM_RESET_OUTGOING_SSN;
+               }
+ 
++              flags |= SCTP_STREAM_RESET_OUTGOING_SSN;
++
+               for (i = 0; i < stream->outcnt; i++)
+                       SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
+ 
+@@ -1058,6 +1051,8 @@ struct sctp_chunk *sctp_process_strreset
+               nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) /
+                      sizeof(__u16);
+ 
++              flags |= SCTP_STREAM_RESET_INCOMING_SSN;
++
+               *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
+                       nums, str_p, GFP_ATOMIC);
+       } else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) {
diff --git a/queue-4.19/sctp-set-chunk-transport-correctly-when-it-s-a-new-asoc.patch b/queue-4.19/sctp-set-chunk-transport-correctly-when-it-s-a-new-asoc.patch

new file mode 100644 (file)

index 0000000..667e7e1
--- /dev/null
+++ b/queue-4.19/sctp-set-chunk-transport-correctly-when-it-s-a-new-asoc.patch
@@ -0,0 +1,65 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 22 Jan 2019 02:42:09 +0800
+Subject: sctp: set chunk transport correctly when it's a new asoc
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 4ff40b86262b73553ee47cc3784ce8ba0f220bd8 ]
+
+In the paths:
+
+  sctp_sf_do_unexpected_init() ->
+    sctp_make_init_ack()
+  sctp_sf_do_dupcook_a/b()() ->
+    sctp_sf_do_5_1D_ce()
+
+The new chunk 'retval' transport is set from the incoming chunk 'chunk'
+transport. However, 'retval' transport belong to the new asoc, which
+is a different one from 'chunk' transport's asoc.
+
+It will cause that the 'retval' chunk gets set with a wrong transport.
+Later when sending it and because of Commit b9fd683982c9 ("sctp: add
+sctp_packet_singleton"), sctp_packet_singleton() will set some fields,
+like vtag to 'retval' chunk from that wrong transport's asoc.
+
+This patch is to fix it by setting 'retval' transport correctly which
+belongs to the right asoc in sctp_make_init_ack() and
+sctp_sf_do_5_1D_ce().
+
+Fixes: b9fd683982c9 ("sctp: add sctp_packet_singleton")
+Reported-by: Ying Xu <yinxu@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/sm_make_chunk.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/net/sctp/sm_make_chunk.c
++++ b/net/sctp/sm_make_chunk.c
+@@ -495,7 +495,10 @@ struct sctp_chunk *sctp_make_init_ack(co
+        *
+        * [INIT ACK back to where the INIT came from.]
+        */
+-      retval->transport = chunk->transport;
++      if (chunk->transport)
++              retval->transport =
++                      sctp_assoc_lookup_paddr(asoc,
++                                              &chunk->transport->ipaddr);
+ 
+       retval->subh.init_hdr =
+               sctp_addto_chunk(retval, sizeof(initack), &initack);
+@@ -642,8 +645,10 @@ struct sctp_chunk *sctp_make_cookie_ack(
+        *
+        * [COOKIE ACK back to where the COOKIE ECHO came from.]
+        */
+-      if (retval && chunk)
+-              retval->transport = chunk->transport;
++      if (retval && chunk && chunk->transport)
++              retval->transport =
++                      sctp_assoc_lookup_paddr(asoc,
++                                              &chunk->transport->ipaddr);
+ 
+       return retval;
+ }
diff --git a/queue-4.19/sctp-set-flow-sport-from-saddr-only-when-it-s-0.patch b/queue-4.19/sctp-set-flow-sport-from-saddr-only-when-it-s-0.patch

new file mode 100644 (file)

index 0000000..c58adf6
--- /dev/null
+++ b/queue-4.19/sctp-set-flow-sport-from-saddr-only-when-it-s-0.patch
@@ -0,0 +1,66 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 22 Jan 2019 02:42:41 +0800
+Subject: sctp: set flow sport from saddr only when it's 0
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit ecf938fe7d0088077ee1280419a2b3c5429b47c8 ]
+
+Now sctp_transport_pmtu() passes transport->saddr into .get_dst() to set
+flow sport from 'saddr'. However, transport->saddr is set only when
+transport->dst exists in sctp_transport_route().
+
+If sctp_transport_pmtu() is called without transport->saddr set, like
+when transport->dst doesn't exists, the flow sport will be set to 0
+from transport->saddr, which will cause a wrong route to be got.
+
+Commit 6e91b578bf3f ("sctp: re-use sctp_transport_pmtu in
+sctp_transport_route") made the issue be triggered more easily
+since sctp_transport_pmtu() would be called in sctp_transport_route()
+after that.
+
+In gerneral, fl4->fl4_sport should always be set to
+htons(asoc->base.bind_addr.port), unless transport->asoc doesn't exist
+in sctp_v4/6_get_dst(), which is the case:
+
+  sctp_ootb_pkt_new() ->
+    sctp_transport_route()
+
+For that, we can simply handle it by setting flow sport from saddr only
+when it's 0 in sctp_v4/6_get_dst().
+
+Fixes: 6e91b578bf3f ("sctp: re-use sctp_transport_pmtu in sctp_transport_route")
+Reported-by: Ying Xu <yinxu@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/ipv6.c     |    3 ++-
+ net/sctp/protocol.c |    3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/sctp/ipv6.c
++++ b/net/sctp/ipv6.c
+@@ -277,7 +277,8 @@ static void sctp_v6_get_dst(struct sctp_
+ 
+       if (saddr) {
+               fl6->saddr = saddr->v6.sin6_addr;
+-              fl6->fl6_sport = saddr->v6.sin6_port;
++              if (!fl6->fl6_sport)
++                      fl6->fl6_sport = saddr->v6.sin6_port;
+ 
+               pr_debug("src=%pI6 - ", &fl6->saddr);
+       }
+--- a/net/sctp/protocol.c
++++ b/net/sctp/protocol.c
+@@ -440,7 +440,8 @@ static void sctp_v4_get_dst(struct sctp_
+       }
+       if (saddr) {
+               fl4->saddr = saddr->v4.sin_addr.s_addr;
+-              fl4->fl4_sport = saddr->v4.sin_port;
++              if (!fl4->fl4_sport)
++                      fl4->fl4_sport = saddr->v4.sin_port;
+       }
+ 
+       pr_debug("%s: dst:%pI4, src:%pI4 - ", __func__, &fl4->daddr,
diff --git a/queue-4.19/series b/queue-4.19/series

index 4a57c278689cc1a159a7a611620237d339bfc77b..45177c4adae2d4cb08f033543ecebacf7c2c686c 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -1,2 +1,32 @@
  fix-net-ipv4-do-not-handle-duplicate-fragments-as-overlapping.patch
  drm-msm-gpu-fix-building-without-debugfs.patch
+ipv6-consider-sk_bound_dev_if-when-binding-a-socket-to-an-address.patch
+ipv6-sr-clear-ip6cb-skb-on-srh-ip4ip6-encapsulation.patch
+ipvlan-l3mdev-fix-broken-l3s-mode-wrt-local-routes.patch
+l2tp-copy-4-more-bytes-to-linear-part-if-necessary.patch
+l2tp-fix-reading-optional-fields-of-l2tpv3.patch
+net-ip_gre-always-reports-o_key-to-userspace.patch
+net-ip_gre-use-erspan-key-field-for-tunnel-lookup.patch
+net-mlx4_core-add-masking-for-a-few-queries-on-hca-caps.patch
+netrom-switch-to-sock-timer-api.patch
+net-rose-fix-null-ax25_cb-kernel-panic.patch
+net-set-default-network-namespace-in-init_dummy_netdev.patch
+ravb-expand-rx-descriptor-data-to-accommodate-hw-checksum.patch
+sctp-improve-the-events-for-sctp-stream-reset.patch
+tun-move-the-call-to-tun_set_real_num_queues.patch
+ucc_geth-reset-bql-queue-when-stopping-device.patch
+vhost-fix-oob-in-get_rx_bufs.patch
+net-ip6_gre-always-reports-o_key-to-userspace.patch
+sctp-improve-the-events-for-sctp-stream-adding.patch
+net-mlx5e-allow-mac-invalidation-while-spoofchk-is-on.patch
+ip6mr-fix-notifiers-call-on-mroute_clean_tables.patch
+revert-net-mlx5e-e-switch-initialize-eswitch-only-if-eswitch-manager.patch
+sctp-set-chunk-transport-correctly-when-it-s-a-new-asoc.patch
+sctp-set-flow-sport-from-saddr-only-when-it-s-0.patch
+virtio_net-don-t-enable-napi-when-interface-is-down.patch
+virtio_net-don-t-call-free_old_xmit_skbs-for-xdp_frames.patch
+virtio_net-fix-not-restoring-real_num_rx_queues.patch
+virtio_net-fix-out-of-bounds-access-of-sq.patch
+virtio_net-don-t-process-redirected-xdp-frames-when-xdp-is-disabled.patch
+virtio_net-use-xdp_return_frame-to-free-xdp_frames-on-destroying-vqs.patch
+virtio_net-differentiate-sk_buff-and-xdp_frame-on-freeing.patch
diff --git a/queue-4.19/tun-move-the-call-to-tun_set_real_num_queues.patch b/queue-4.19/tun-move-the-call-to-tun_set_real_num_queues.patch

new file mode 100644 (file)

index 0000000..6b79a84
--- /dev/null
+++ b/queue-4.19/tun-move-the-call-to-tun_set_real_num_queues.patch
@@ -0,0 +1,43 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: George Amanakis <gamanakis@gmail.com>
+Date: Tue, 29 Jan 2019 22:50:13 -0500
+Subject: tun: move the call to tun_set_real_num_queues
+
+From: George Amanakis <gamanakis@gmail.com>
+
+[ Upstream commit 3a03cb8456cc1d61c467a5375e0a10e5207b948c ]
+
+Call tun_set_real_num_queues() after the increment of tun->numqueues
+since the former depends on it. Otherwise, the number of queues is not
+correctly accounted for, which results to warnings similar to:
+"vnet0 selects TX queue 11, but real number of TX queues is 11".
+
+Fixes: 0b7959b62573 ("tun: publish tfile after it's fully initialized")
+Reported-and-tested-by: George Amanakis <gamanakis@gmail.com>
+Signed-off-by: George Amanakis <gamanakis@gmail.com>
+Signed-off-by: Stanislav Fomichev <sdf@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -866,8 +866,6 @@ static int tun_attach(struct tun_struct
+               tun_napi_init(tun, tfile, napi, napi_frags);
+       }
+ 
+-      tun_set_real_num_queues(tun);
+-
+       /* device is allowed to go away first, so no need to hold extra
+        * refcnt.
+        */
+@@ -879,6 +877,7 @@ static int tun_attach(struct tun_struct
+       rcu_assign_pointer(tfile->tun, tun);
+       rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
+       tun->numqueues++;
++      tun_set_real_num_queues(tun);
+ out:
+       return err;
+ }
diff --git a/queue-4.19/ucc_geth-reset-bql-queue-when-stopping-device.patch b/queue-4.19/ucc_geth-reset-bql-queue-when-stopping-device.patch

new file mode 100644 (file)

index 0000000..4bff791
--- /dev/null
+++ b/queue-4.19/ucc_geth-reset-bql-queue-when-stopping-device.patch
@@ -0,0 +1,33 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Mathias Thore <mathias.thore@infinera.com>
+Date: Mon, 28 Jan 2019 10:07:47 +0100
+Subject: ucc_geth: Reset BQL queue when stopping device
+
+From: Mathias Thore <mathias.thore@infinera.com>
+
+[ Upstream commit e15aa3b2b1388c399c1a2ce08550d2cc4f7e3e14 ]
+
+After a timeout event caused by for example a broadcast storm, when
+the MAC and PHY are reset, the BQL TX queue needs to be reset as
+well. Otherwise, the device will exhibit severe performance issues
+even after the storm has ended.
+
+Co-authored-by: David Gounaris <david.gounaris@infinera.com>
+Signed-off-by: Mathias Thore <mathias.thore@infinera.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/ucc_geth.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/freescale/ucc_geth.c
++++ b/drivers/net/ethernet/freescale/ucc_geth.c
+@@ -1888,6 +1888,8 @@ static void ucc_geth_free_tx(struct ucc_
+       u16 i, j;
+       u8 __iomem *bd;
+ 
++      netdev_reset_queue(ugeth->ndev);
++
+       ug_info = ugeth->ug_info;
+       uf_info = &ug_info->uf_info;
+ 
diff --git a/queue-4.19/vhost-fix-oob-in-get_rx_bufs.patch b/queue-4.19/vhost-fix-oob-in-get_rx_bufs.patch

new file mode 100644 (file)

index 0000000..76b3482
--- /dev/null
+++ b/queue-4.19/vhost-fix-oob-in-get_rx_bufs.patch
@@ -0,0 +1,146 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Jason Wang <jasowang@redhat.com>
+Date: Mon, 28 Jan 2019 15:05:05 +0800
+Subject: vhost: fix OOB in get_rx_bufs()
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit b46a0bf78ad7b150ef5910da83859f7f5a514ffd ]
+
+After batched used ring updating was introduced in commit e2b3b35eb989
+("vhost_net: batch used ring update in rx"). We tend to batch heads in
+vq->heads for more than one packet. But the quota passed to
+get_rx_bufs() was not correctly limited, which can result a OOB write
+in vq->heads.
+
+        headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
+                    vhost_len, &in, vq_log, &log,
+                    likely(mergeable) ? UIO_MAXIOV : 1);
+
+UIO_MAXIOV was still used which is wrong since we could have batched
+used in vq->heads, this will cause OOB if the next buffer needs more
+than 960 (1024 (UIO_MAXIOV) - 64 (VHOST_NET_BATCH)) heads after we've
+batched 64 (VHOST_NET_BATCH) heads:
+Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+
+=============================================================================
+BUG kmalloc-8k (Tainted: G    B            ): Redzone overwritten
+-----------------------------------------------------------------------------
+
+INFO: 0x00000000fd93b7a2-0x00000000f0713384. First byte 0xa9 instead of 0xcc
+INFO: Allocated in alloc_pd+0x22/0x60 age=3933677 cpu=2 pid=2674
+    kmem_cache_alloc_trace+0xbb/0x140
+    alloc_pd+0x22/0x60
+    gen8_ppgtt_create+0x11d/0x5f0
+    i915_ppgtt_create+0x16/0x80
+    i915_gem_create_context+0x248/0x390
+    i915_gem_context_create_ioctl+0x4b/0xe0
+    drm_ioctl_kernel+0xa5/0xf0
+    drm_ioctl+0x2ed/0x3a0
+    do_vfs_ioctl+0x9f/0x620
+    ksys_ioctl+0x6b/0x80
+    __x64_sys_ioctl+0x11/0x20
+    do_syscall_64+0x43/0xf0
+    entry_SYSCALL_64_after_hwframe+0x44/0xa9
+INFO: Slab 0x00000000d13e87af objects=3 used=3 fp=0x          (null) flags=0x200000000010201
+INFO: Object 0x0000000003278802 @offset=17064 fp=0x00000000e2e6652b
+
+Fixing this by allocating UIO_MAXIOV + VHOST_NET_BATCH iovs for
+vhost-net. This is done through set the limitation through
+vhost_dev_init(), then set_owner can allocate the number of iov in a
+per device manner.
+
+This fixes CVE-2018-16880.
+
+Fixes: e2b3b35eb989 ("vhost_net: batch used ring update in rx")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/net.c   |    3 ++-
+ drivers/vhost/scsi.c  |    2 +-
+ drivers/vhost/vhost.c |    7 ++++---
+ drivers/vhost/vhost.h |    4 +++-
+ drivers/vhost/vsock.c |    2 +-
+ 5 files changed, 11 insertions(+), 7 deletions(-)
+
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -1114,7 +1114,8 @@ static int vhost_net_open(struct inode *
+               n->vqs[i].rx_ring = NULL;
+               vhost_net_buf_init(&n->vqs[i].rxq);
+       }
+-      vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
++      vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
++                     UIO_MAXIOV + VHOST_NET_BATCH);
+ 
+       vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
+       vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);
+--- a/drivers/vhost/scsi.c
++++ b/drivers/vhost/scsi.c
+@@ -1398,7 +1398,7 @@ static int vhost_scsi_open(struct inode
+               vqs[i] = &vs->vqs[i].vq;
+               vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
+       }
+-      vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ);
++      vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV);
+ 
+       vhost_scsi_init_inflight(vs, NULL);
+ 
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -390,9 +390,9 @@ static long vhost_dev_alloc_iovecs(struc
+               vq->indirect = kmalloc_array(UIO_MAXIOV,
+                                            sizeof(*vq->indirect),
+                                            GFP_KERNEL);
+-              vq->log = kmalloc_array(UIO_MAXIOV, sizeof(*vq->log),
++              vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log),
+                                       GFP_KERNEL);
+-              vq->heads = kmalloc_array(UIO_MAXIOV, sizeof(*vq->heads),
++              vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
+                                         GFP_KERNEL);
+               if (!vq->indirect || !vq->log || !vq->heads)
+                       goto err_nomem;
+@@ -414,7 +414,7 @@ static void vhost_dev_free_iovecs(struct
+ }
+ 
+ void vhost_dev_init(struct vhost_dev *dev,
+-                  struct vhost_virtqueue **vqs, int nvqs)
++                  struct vhost_virtqueue **vqs, int nvqs, int iov_limit)
+ {
+       struct vhost_virtqueue *vq;
+       int i;
+@@ -427,6 +427,7 @@ void vhost_dev_init(struct vhost_dev *de
+       dev->iotlb = NULL;
+       dev->mm = NULL;
+       dev->worker = NULL;
++      dev->iov_limit = iov_limit;
+       init_llist_head(&dev->work_list);
+       init_waitqueue_head(&dev->wait);
+       INIT_LIST_HEAD(&dev->read_list);
+--- a/drivers/vhost/vhost.h
++++ b/drivers/vhost/vhost.h
+@@ -170,9 +170,11 @@ struct vhost_dev {
+       struct list_head read_list;
+       struct list_head pending_list;
+       wait_queue_head_t wait;
++      int iov_limit;
+ };
+ 
+-void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs);
++void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
++                  int nvqs, int iov_limit);
+ long vhost_dev_set_owner(struct vhost_dev *dev);
+ bool vhost_dev_has_owner(struct vhost_dev *dev);
+ long vhost_dev_check_owner(struct vhost_dev *);
+--- a/drivers/vhost/vsock.c
++++ b/drivers/vhost/vsock.c
+@@ -531,7 +531,7 @@ static int vhost_vsock_dev_open(struct i
+       vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
+       vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
+ 
+-      vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs));
++      vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV);
+ 
+       file->private_data = vsock;
+       spin_lock_init(&vsock->send_pkt_list_lock);
diff --git a/queue-4.19/virtio_net-differentiate-sk_buff-and-xdp_frame-on-freeing.patch b/queue-4.19/virtio_net-differentiate-sk_buff-and-xdp_frame-on-freeing.patch

new file mode 100644 (file)

index 0000000..f2c4b51
--- /dev/null
+++ b/queue-4.19/virtio_net-differentiate-sk_buff-and-xdp_frame-on-freeing.patch
@@ -0,0 +1,197 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:59 +0900
+Subject: virtio_net: Differentiate sk_buff and xdp_frame on freeing
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 5050471d35d1316ba32dfcbb409978337eb9e75e
+
+  I had to fold commit df133f3f9625 ("virtio_net: bulk free tx skbs")
+  into this to make it work.  ]
+
+We do not reset or free up unused buffers when enabling/disabling XDP,
+so it can happen that xdp_frames are freed after disabling XDP or
+sk_buffs are freed after enabling XDP on xdp tx queues.
+Thus we need to handle both forms (xdp_frames and sk_buffs) regardless
+of XDP setting.
+One way to trigger this problem is to disable XDP when napi_tx is
+enabled. In that case, virtnet_xdp_set() calls virtnet_napi_enable()
+which kicks NAPI. The NAPI handler will call virtnet_poll_cleantx()
+which invokes free_old_xmit_skbs() for queues which have been used by
+XDP.
+
+Note that even with this change we need to keep skipping
+free_old_xmit_skbs() from NAPI handlers when XDP is enabled, because XDP
+tx queues do not aquire queue locks.
+
+- v2: Use napi_consume_skb() instead of dev_consume_skb_any()
+
+Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |   64 ++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 47 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -57,6 +57,8 @@ module_param(napi_tx, bool, 0644);
+ #define VIRTIO_XDP_TX         BIT(0)
+ #define VIRTIO_XDP_REDIR      BIT(1)
+ 
++#define VIRTIO_XDP_FLAG       BIT(0)
++
+ /* RX packet size EWMA. The average packet size is used to determine the packet
+  * buffer size when refilling RX rings. As the entire RX ring may be refilled
+  * at once, the weight is chosen so that the EWMA will be insensitive to short-
+@@ -251,6 +253,21 @@ struct padded_vnet_hdr {
+       char padding[4];
+ };
+ 
++static bool is_xdp_frame(void *ptr)
++{
++      return (unsigned long)ptr & VIRTIO_XDP_FLAG;
++}
++
++static void *xdp_to_ptr(struct xdp_frame *ptr)
++{
++      return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
++}
++
++static struct xdp_frame *ptr_to_xdp(void *ptr)
++{
++      return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
++}
++
+ /* Converting between virtqueue no. and kernel tx/rx queue no.
+  * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
+  */
+@@ -461,7 +478,8 @@ static int __virtnet_xdp_xmit_one(struct
+ 
+       sg_init_one(sq->sg, xdpf->data, xdpf->len);
+ 
+-      err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC);
++      err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf),
++                                 GFP_ATOMIC);
+       if (unlikely(err))
+               return -ENOSPC; /* Caller handle free/refcnt */
+ 
+@@ -481,13 +499,13 @@ static int virtnet_xdp_xmit(struct net_d
+ {
+       struct virtnet_info *vi = netdev_priv(dev);
+       struct receive_queue *rq = vi->rq;
+-      struct xdp_frame *xdpf_sent;
+       struct bpf_prog *xdp_prog;
+       struct send_queue *sq;
+       unsigned int len;
+       int drops = 0;
+       int kicks = 0;
+       int ret, err;
++      void *ptr;
+       int i;
+ 
+       /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
+@@ -506,8 +524,12 @@ static int virtnet_xdp_xmit(struct net_d
+       }
+ 
+       /* Free up any pending old buffers before queueing new ones. */
+-      while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
+-              xdp_return_frame(xdpf_sent);
++      while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
++              if (likely(is_xdp_frame(ptr)))
++                      xdp_return_frame(ptr_to_xdp(ptr));
++              else
++                      napi_consume_skb(ptr, false);
++      }
+ 
+       for (i = 0; i < n; i++) {
+               struct xdp_frame *xdpf = frames[i];
+@@ -1326,20 +1348,28 @@ static int virtnet_receive(struct receiv
+       return stats.packets;
+ }
+ 
+-static void free_old_xmit_skbs(struct send_queue *sq)
++static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
+ {
+-      struct sk_buff *skb;
+       unsigned int len;
+       unsigned int packets = 0;
+       unsigned int bytes = 0;
++      void *ptr;
+ 
+-      while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+-              pr_debug("Sent skb %p\n", skb);
++      while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
++              if (likely(!is_xdp_frame(ptr))) {
++                      struct sk_buff *skb = ptr;
+ 
+-              bytes += skb->len;
+-              packets++;
++                      pr_debug("Sent skb %p\n", skb);
+ 
+-              dev_consume_skb_any(skb);
++                      bytes += skb->len;
++                      napi_consume_skb(skb, in_napi);
++              } else {
++                      struct xdp_frame *frame = ptr_to_xdp(ptr);
++
++                      bytes += frame->len;
++                      xdp_return_frame(frame);
++              }
++              packets++;
+       }
+ 
+       /* Avoid overhead when no packets have been processed
+@@ -1375,7 +1405,7 @@ static void virtnet_poll_cleantx(struct
+               return;
+ 
+       if (__netif_tx_trylock(txq)) {
+-              free_old_xmit_skbs(sq);
++              free_old_xmit_skbs(sq, true);
+               __netif_tx_unlock(txq);
+       }
+ 
+@@ -1459,7 +1489,7 @@ static int virtnet_poll_tx(struct napi_s
+ 
+       txq = netdev_get_tx_queue(vi->dev, index);
+       __netif_tx_lock(txq, raw_smp_processor_id());
+-      free_old_xmit_skbs(sq);
++      free_old_xmit_skbs(sq, true);
+       __netif_tx_unlock(txq);
+ 
+       virtqueue_napi_complete(napi, sq->vq, 0);
+@@ -1528,7 +1558,7 @@ static netdev_tx_t start_xmit(struct sk_
+       bool use_napi = sq->napi.weight;
+ 
+       /* Free up any pending old buffers before queueing new ones. */
+-      free_old_xmit_skbs(sq);
++      free_old_xmit_skbs(sq, false);
+ 
+       if (use_napi && kick)
+               virtqueue_enable_cb_delayed(sq->vq);
+@@ -1571,7 +1601,7 @@ static netdev_tx_t start_xmit(struct sk_
+               if (!use_napi &&
+                   unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
+                       /* More just got used, free them then recheck. */
+-                      free_old_xmit_skbs(sq);
++                      free_old_xmit_skbs(sq, false);
+                       if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
+                               netif_start_subqueue(dev, qnum);
+                               virtqueue_disable_cb(sq->vq);
+@@ -2590,10 +2620,10 @@ static void free_unused_bufs(struct virt
+       for (i = 0; i < vi->max_queue_pairs; i++) {
+               struct virtqueue *vq = vi->sq[i].vq;
+               while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
+-                      if (!is_xdp_raw_buffer_queue(vi, i))
++                      if (!is_xdp_frame(buf))
+                               dev_kfree_skb(buf);
+                       else
+-                              xdp_return_frame(buf);
++                              xdp_return_frame(ptr_to_xdp(buf));
+               }
+       }
+ 
diff --git a/queue-4.19/virtio_net-don-t-call-free_old_xmit_skbs-for-xdp_frames.patch b/queue-4.19/virtio_net-don-t-call-free_old_xmit_skbs-for-xdp_frames.patch

new file mode 100644 (file)

index 0000000..770bc93
--- /dev/null
+++ b/queue-4.19/virtio_net-don-t-call-free_old_xmit_skbs-for-xdp_frames.patch
@@ -0,0 +1,143 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:54 +0900
+Subject: virtio_net: Don't call free_old_xmit_skbs for xdp_frames
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 534da5e856334fb54cb0272a9fb3afec28ea3aed ]
+
+When napi_tx is enabled, virtnet_poll_cleantx() called
+free_old_xmit_skbs() even for xdp send queue.
+This is bogus since the queue has xdp_frames, not sk_buffs, thus mangled
+device tx bytes counters because skb->len is meaningless value, and even
+triggered oops due to general protection fault on freeing them.
+
+Since xdp send queues do not aquire locks, old xdp_frames should be
+freed only in virtnet_xdp_xmit(), so just skip free_old_xmit_skbs() for
+xdp send queues.
+
+Similarly virtnet_poll_tx() called free_old_xmit_skbs(). This NAPI
+handler is called even without calling start_xmit() because cb for tx is
+by default enabled. Once the handler is called, it enabled the cb again,
+and then the handler would be called again. We don't need this handler
+for XDP, so don't enable cb as well as not calling free_old_xmit_skbs().
+
+Also, we need to disable tx NAPI when disabling XDP, so
+virtnet_poll_tx() can safely access curr_queue_pairs and
+xdp_queue_pairs, which are not atomically updated while disabling XDP.
+
+Fixes: b92f1e6751a6 ("virtio-net: transmit napi")
+Fixes: 7b0411ef4aa6 ("virtio-net: clean tx descriptors from rx napi")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |   49 +++++++++++++++++++++++++++++++----------------
+ 1 file changed, 33 insertions(+), 16 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -1357,6 +1357,16 @@ static void free_old_xmit_skbs(struct se
+       u64_stats_update_end(&sq->stats.syncp);
+ }
+ 
++static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
++{
++      if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
++              return false;
++      else if (q < vi->curr_queue_pairs)
++              return true;
++      else
++              return false;
++}
++
+ static void virtnet_poll_cleantx(struct receive_queue *rq)
+ {
+       struct virtnet_info *vi = rq->vq->vdev->priv;
+@@ -1364,7 +1374,7 @@ static void virtnet_poll_cleantx(struct
+       struct send_queue *sq = &vi->sq[index];
+       struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
+ 
+-      if (!sq->napi.weight)
++      if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
+               return;
+ 
+       if (__netif_tx_trylock(txq)) {
+@@ -1441,8 +1451,16 @@ static int virtnet_poll_tx(struct napi_s
+ {
+       struct send_queue *sq = container_of(napi, struct send_queue, napi);
+       struct virtnet_info *vi = sq->vq->vdev->priv;
+-      struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq));
++      unsigned int index = vq2txq(sq->vq);
++      struct netdev_queue *txq;
+ 
++      if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
++              /* We don't need to enable cb for XDP */
++              napi_complete_done(napi, 0);
++              return 0;
++      }
++
++      txq = netdev_get_tx_queue(vi->dev, index);
+       __netif_tx_lock(txq, raw_smp_processor_id());
+       free_old_xmit_skbs(sq);
+       __netif_tx_unlock(txq);
+@@ -2352,9 +2370,12 @@ static int virtnet_xdp_set(struct net_de
+       }
+ 
+       /* Make sure NAPI is not using any XDP TX queues for RX. */
+-      if (netif_running(dev))
+-              for (i = 0; i < vi->max_queue_pairs; i++)
++      if (netif_running(dev)) {
++              for (i = 0; i < vi->max_queue_pairs; i++) {
+                       napi_disable(&vi->rq[i].napi);
++                      virtnet_napi_tx_disable(&vi->sq[i].napi);
++              }
++      }
+ 
+       netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+       err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
+@@ -2373,16 +2394,22 @@ static int virtnet_xdp_set(struct net_de
+               }
+               if (old_prog)
+                       bpf_prog_put(old_prog);
+-              if (netif_running(dev))
++              if (netif_running(dev)) {
+                       virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
++                      virtnet_napi_tx_enable(vi, vi->sq[i].vq,
++                                             &vi->sq[i].napi);
++              }
+       }
+ 
+       return 0;
+ 
+ err:
+       if (netif_running(dev)) {
+-              for (i = 0; i < vi->max_queue_pairs; i++)
++              for (i = 0; i < vi->max_queue_pairs; i++) {
+                       virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
++                      virtnet_napi_tx_enable(vi, vi->sq[i].vq,
++                                             &vi->sq[i].napi);
++              }
+       }
+       if (prog)
+               bpf_prog_sub(prog, vi->max_queue_pairs - 1);
+@@ -2539,16 +2566,6 @@ static void free_receive_page_frags(stru
+                       put_page(vi->rq[i].alloc_frag.page);
+ }
+ 
+-static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
+-{
+-      if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
+-              return false;
+-      else if (q < vi->curr_queue_pairs)
+-              return true;
+-      else
+-              return false;
+-}
+-
+ static void free_unused_bufs(struct virtnet_info *vi)
+ {
+       void *buf;
diff --git a/queue-4.19/virtio_net-don-t-enable-napi-when-interface-is-down.patch b/queue-4.19/virtio_net-don-t-enable-napi-when-interface-is-down.patch

new file mode 100644 (file)

index 0000000..037ccd0
--- /dev/null
+++ b/queue-4.19/virtio_net-don-t-enable-napi-when-interface-is-down.patch
@@ -0,0 +1,42 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:53 +0900
+Subject: virtio_net: Don't enable NAPI when interface is down
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 8be4d9a492f88b96d4d3a06c6cbedbc40ca14c83 ]
+
+Commit 4e09ff536284 ("virtio-net: disable NAPI only when enabled during
+XDP set") tried to fix inappropriate NAPI enabling/disabling when
+!netif_running(), but was not complete.
+
+On error path virtio_net could enable NAPI even when !netif_running().
+This can cause enabling NAPI twice on virtnet_open(), which would
+trigger BUG_ON() in napi_enable().
+
+Fixes: 4941d472bf95b ("virtio-net: do not reset during XDP set")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -2380,8 +2380,10 @@ static int virtnet_xdp_set(struct net_de
+       return 0;
+ 
+ err:
+-      for (i = 0; i < vi->max_queue_pairs; i++)
+-              virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
++      if (netif_running(dev)) {
++              for (i = 0; i < vi->max_queue_pairs; i++)
++                      virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
++      }
+       if (prog)
+               bpf_prog_sub(prog, vi->max_queue_pairs - 1);
+       return err;
diff --git a/queue-4.19/virtio_net-don-t-process-redirected-xdp-frames-when-xdp-is-disabled.patch b/queue-4.19/virtio_net-don-t-process-redirected-xdp-frames-when-xdp-is-disabled.patch

new file mode 100644 (file)

index 0000000..bd2e1df
--- /dev/null
+++ b/queue-4.19/virtio_net-don-t-process-redirected-xdp-frames-when-xdp-is-disabled.patch
@@ -0,0 +1,121 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:57 +0900
+Subject: virtio_net: Don't process redirected XDP frames when XDP is disabled
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 03aa6d34868c07b2b1b8b2db080602d7ec528173 ]
+
+Commit 8dcc5b0ab0ec ("virtio_net: fix ndo_xdp_xmit crash towards dev not
+ready for XDP") tried to avoid access to unexpected sq while XDP is
+disabled, but was not complete.
+
+There was a small window which causes out of bounds sq access in
+virtnet_xdp_xmit() while disabling XDP.
+
+An example case of
+ - curr_queue_pairs = 6 (2 for SKB and 4 for XDP)
+ - online_cpu_num = xdp_queue_paris = 4
+when XDP is enabled:
+
+CPU 0                         CPU 1
+(Disabling XDP)               (Processing redirected XDP frames)
+
+                              virtnet_xdp_xmit()
+virtnet_xdp_set()
+ _virtnet_set_queues()
+  set curr_queue_pairs (2)
+                               check if rq->xdp_prog is not NULL
+                               virtnet_xdp_sq(vi)
+                                qp = curr_queue_pairs -
+                                     xdp_queue_pairs +
+                                     smp_processor_id()
+                                   = 2 - 4 + 1 = -1
+                                sq = &vi->sq[qp] // out of bounds access
+  set xdp_queue_pairs (0)
+  rq->xdp_prog = NULL
+
+Basically we should not change curr_queue_pairs and xdp_queue_pairs
+while someone can read the values. Thus, when disabling XDP, assign NULL
+to rq->xdp_prog first, and wait for RCU grace period, then change
+xxx_queue_pairs.
+Note that we need to keep the current order when enabling XDP though.
+
+- v2: Make rcu_assign_pointer/synchronize_net conditional instead of
+      _virtnet_set_queues.
+
+Fixes: 186b3c998c50 ("virtio-net: support XDP_REDIRECT")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |   33 ++++++++++++++++++++++++++-------
+ 1 file changed, 26 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -2360,6 +2360,10 @@ static int virtnet_xdp_set(struct net_de
+               return -ENOMEM;
+       }
+ 
++      old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
++      if (!prog && !old_prog)
++              return 0;
++
+       if (prog) {
+               prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
+               if (IS_ERR(prog))
+@@ -2374,21 +2378,30 @@ static int virtnet_xdp_set(struct net_de
+               }
+       }
+ 
++      if (!prog) {
++              for (i = 0; i < vi->max_queue_pairs; i++) {
++                      rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
++                      if (i == 0)
++                              virtnet_restore_guest_offloads(vi);
++              }
++              synchronize_net();
++      }
++
+       err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
+       if (err)
+               goto err;
+       netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+       vi->xdp_queue_pairs = xdp_qp;
+ 
+-      for (i = 0; i < vi->max_queue_pairs; i++) {
+-              old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
+-              rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+-              if (i == 0) {
+-                      if (!old_prog)
++      if (prog) {
++              for (i = 0; i < vi->max_queue_pairs; i++) {
++                      rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
++                      if (i == 0 && !old_prog)
+                               virtnet_clear_guest_offloads(vi);
+-                      if (!prog)
+-                              virtnet_restore_guest_offloads(vi);
+               }
++      }
++
++      for (i = 0; i < vi->max_queue_pairs; i++) {
+               if (old_prog)
+                       bpf_prog_put(old_prog);
+               if (netif_running(dev)) {
+@@ -2401,6 +2414,12 @@ static int virtnet_xdp_set(struct net_de
+       return 0;
+ 
+ err:
++      if (!prog) {
++              virtnet_clear_guest_offloads(vi);
++              for (i = 0; i < vi->max_queue_pairs; i++)
++                      rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
++      }
++
+       if (netif_running(dev)) {
+               for (i = 0; i < vi->max_queue_pairs; i++) {
+                       virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
diff --git a/queue-4.19/virtio_net-fix-not-restoring-real_num_rx_queues.patch b/queue-4.19/virtio_net-fix-not-restoring-real_num_rx_queues.patch

new file mode 100644 (file)

index 0000000..6150c19
--- /dev/null
+++ b/queue-4.19/virtio_net-fix-not-restoring-real_num_rx_queues.patch
@@ -0,0 +1,38 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:55 +0900
+Subject: virtio_net: Fix not restoring real_num_rx_queues
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 188313c137c4f76afd0862f50dbc185b198b9e2a ]
+
+When _virtnet_set_queues() failed we did not restore real_num_rx_queues.
+Fix this by placing the change of real_num_rx_queues after
+_virtnet_set_queues().
+This order is also in line with virtnet_set_channels().
+
+Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -2377,10 +2377,10 @@ static int virtnet_xdp_set(struct net_de
+               }
+       }
+ 
+-      netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+       err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
+       if (err)
+               goto err;
++      netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+       vi->xdp_queue_pairs = xdp_qp;
+ 
+       for (i = 0; i < vi->max_queue_pairs; i++) {
diff --git a/queue-4.19/virtio_net-fix-out-of-bounds-access-of-sq.patch b/queue-4.19/virtio_net-fix-out-of-bounds-access-of-sq.patch

new file mode 100644 (file)

index 0000000..047e882
--- /dev/null
+++ b/queue-4.19/virtio_net-fix-out-of-bounds-access-of-sq.patch
@@ -0,0 +1,54 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:56 +0900
+Subject: virtio_net: Fix out of bounds access of sq
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 1667c08a9d31c7cdf09f4890816bfbf20b685495 ]
+
+When XDP is disabled, curr_queue_pairs + smp_processor_id() can be
+larger than max_queue_pairs.
+There is no guarantee that we have enough XDP send queues dedicated for
+each cpu when XDP is disabled, so do not count drops on sq in that case.
+
+Fixes: 5b8f3c8d30a6 ("virtio_net: Add XDP related stats")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |   17 +++++++----------
+ 1 file changed, 7 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -490,20 +490,17 @@ static int virtnet_xdp_xmit(struct net_d
+       int ret, err;
+       int i;
+ 
+-      sq = virtnet_xdp_sq(vi);
+-
+-      if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
+-              ret = -EINVAL;
+-              drops = n;
+-              goto out;
+-      }
+-
+       /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
+        * indicate XDP resources have been successfully allocated.
+        */
+       xdp_prog = rcu_dereference(rq->xdp_prog);
+-      if (!xdp_prog) {
+-              ret = -ENXIO;
++      if (!xdp_prog)
++              return -ENXIO;
++
++      sq = virtnet_xdp_sq(vi);
++
++      if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
++              ret = -EINVAL;
+               drops = n;
+               goto out;
+       }
diff --git a/queue-4.19/virtio_net-use-xdp_return_frame-to-free-xdp_frames-on-destroying-vqs.patch b/queue-4.19/virtio_net-use-xdp_return_frame-to-free-xdp_frames-on-destroying-vqs.patch

new file mode 100644 (file)

index 0000000..c9d567c
--- /dev/null
+++ b/queue-4.19/virtio_net-use-xdp_return_frame-to-free-xdp_frames-on-destroying-vqs.patch
@@ -0,0 +1,34 @@
+From foo@baz Sat Feb  2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:58 +0900
+Subject: virtio_net: Use xdp_return_frame to free xdp_frames on destroying vqs
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 07b344f494ddda9f061b396407c96df8c46c82b5 ]
+
+put_page() can work as a fallback for freeing xdp_frames, but the
+appropriate way is to use xdp_return_frame().
+
+Fixes: cac320c850ef ("virtio_net: convert to use generic xdp_frame and xdp_return_frame API")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -2593,7 +2593,7 @@ static void free_unused_bufs(struct virt
+                       if (!is_xdp_raw_buffer_queue(vi, i))
+                               dev_kfree_skb(buf);
+                       else
+-                              put_page(virt_to_head_page(buf));
++                              xdp_return_frame(buf);
+               }
+       }
+
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 2 Feb 2019 09:53:44 +0000 (10:53 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 2 Feb 2019 09:53:44 +0000 (10:53 +0100)
queue-4.19/ip6mr-fix-notifiers-call-on-mroute_clean_tables.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/ipv6-consider-sk_bound_dev_if-when-binding-a-socket-to-an-address.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/ipv6-sr-clear-ip6cb-skb-on-srh-ip4ip6-encapsulation.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/ipvlan-l3mdev-fix-broken-l3s-mode-wrt-local-routes.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/l2tp-copy-4-more-bytes-to-linear-part-if-necessary.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/l2tp-fix-reading-optional-fields-of-l2tpv3.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-ip6_gre-always-reports-o_key-to-userspace.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-ip_gre-always-reports-o_key-to-userspace.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-ip_gre-use-erspan-key-field-for-tunnel-lookup.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-mlx4_core-add-masking-for-a-few-queries-on-hca-caps.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-mlx5e-allow-mac-invalidation-while-spoofchk-is-on.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-rose-fix-null-ax25_cb-kernel-panic.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-set-default-network-namespace-in-init_dummy_netdev.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/netrom-switch-to-sock-timer-api.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/ravb-expand-rx-descriptor-data-to-accommodate-hw-checksum.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/revert-net-mlx5e-e-switch-initialize-eswitch-only-if-eswitch-manager.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/sctp-improve-the-events-for-sctp-stream-adding.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/sctp-improve-the-events-for-sctp-stream-reset.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/sctp-set-chunk-transport-correctly-when-it-s-a-new-asoc.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/sctp-set-flow-sport-from-saddr-only-when-it-s-0.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history
queue-4.19/tun-move-the-call-to-tun_set_real_num_queues.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/ucc_geth-reset-bql-queue-when-stopping-device.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/vhost-fix-oob-in-get_rx_bufs.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/virtio_net-differentiate-sk_buff-and-xdp_frame-on-freeing.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/virtio_net-don-t-call-free_old_xmit_skbs-for-xdp_frames.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/virtio_net-don-t-enable-napi-when-interface-is-down.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/virtio_net-don-t-process-redirected-xdp-frames-when-xdp-is-disabled.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/virtio_net-fix-not-restoring-real_num_rx_queues.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/virtio_net-fix-out-of-bounds-access-of-sq.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/virtio_net-use-xdp_return_frame-to-free-xdp_frames-on-destroying-vqs.patch	[new file with mode: 0644]	patch \| blob