--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Nir Dotan <nird@mellanox.com>
+Date: Sun, 27 Jan 2019 09:26:22 +0200
+Subject: ip6mr: Fix notifiers call on mroute_clean_tables()
+
+From: Nir Dotan <nird@mellanox.com>
+
+[ Upstream commit 146820cc240f4389cf33481c058d9493aef95e25 ]
+
+When the MC route socket is closed, mroute_clean_tables() is called to
+cleanup existing routes. Mistakenly notifiers call was put on the cleanup
+of the unresolved MC route entries cache.
+In a case where the MC socket closes before an unresolved route expires,
+the notifier call leads to a crash, caused by the driver trying to
+increment a non initialized refcount_t object [1] and then when handling
+is done, to decrement it [2]. This was detected by a test recently added in
+commit 6d4efada3b82 ("selftests: forwarding: Add multicast routing test").
+
+Fix that by putting notifiers call on the resolved entries traversal,
+instead of on the unresolved entries traversal.
+
+[1]
+
+[ 245.748967] refcount_t: increment on 0; use-after-free.
+[ 245.754829] WARNING: CPU: 3 PID: 3223 at lib/refcount.c:153 refcount_inc_checked+0x2b/0x30
+...
+[ 245.802357] Hardware name: Mellanox Technologies Ltd. MSN2740/SA001237, BIOS 5.6.5 06/07/2016
+[ 245.811873] RIP: 0010:refcount_inc_checked+0x2b/0x30
+...
+[ 245.907487] Call Trace:
+[ 245.910231] mlxsw_sp_router_fib_event.cold.181+0x42/0x47 [mlxsw_spectrum]
+[ 245.917913] notifier_call_chain+0x45/0x7
+[ 245.922484] atomic_notifier_call_chain+0x15/0x20
+[ 245.927729] call_fib_notifiers+0x15/0x30
+[ 245.932205] mroute_clean_tables+0x372/0x3f
+[ 245.936971] ip6mr_sk_done+0xb1/0xc0
+[ 245.940960] ip6_mroute_setsockopt+0x1da/0x5f0
+...
+
+[2]
+
+[ 246.128487] refcount_t: underflow; use-after-free.
+[ 246.133859] WARNING: CPU: 0 PID: 7 at lib/refcount.c:187 refcount_sub_and_test_checked+0x4c/0x60
+[ 246.183521] Hardware name: Mellanox Technologies Ltd. MSN2740/SA001237, BIOS 5.6.5 06/07/2016
+...
+[ 246.193062] Workqueue: mlxsw_core_ordered mlxsw_sp_router_fibmr_event_work [mlxsw_spectrum]
+[ 246.202394] RIP: 0010:refcount_sub_and_test_checked+0x4c/0x60
+...
+[ 246.298889] Call Trace:
+[ 246.301617] refcount_dec_and_test_checked+0x11/0x20
+[ 246.307170] mlxsw_sp_router_fibmr_event_work.cold.196+0x47/0x78 [mlxsw_spectrum]
+[ 246.315531] process_one_work+0x1fa/0x3f0
+[ 246.320005] worker_thread+0x2f/0x3e0
+[ 246.324083] kthread+0x118/0x130
+[ 246.327683] ? wq_update_unbound_numa+0x1b0/0x1b0
+[ 246.332926] ? kthread_park+0x80/0x80
+[ 246.337013] ret_from_fork+0x1f/0x30
+
+Fixes: 088aa3eec2ce ("ip6mr: Support fib notifications")
+Signed-off-by: Nir Dotan <nird@mellanox.com>
+Reviewed-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6mr.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/ip6mr.c
++++ b/net/ipv6/ip6mr.c
+@@ -1506,6 +1506,9 @@ static void mroute_clean_tables(struct m
+ continue;
+ rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+ list_del_rcu(&c->list);
++ call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
++ FIB_EVENT_ENTRY_DEL,
++ (struct mfc6_cache *)c, mrt->id);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ mr_cache_put(c);
+ }
+@@ -1514,10 +1517,6 @@ static void mroute_clean_tables(struct m
+ spin_lock_bh(&mfc_unres_lock);
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
+ list_del(&c->list);
+- call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+- FIB_EVENT_ENTRY_DEL,
+- (struct mfc6_cache *)c,
+- mrt->id);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+ RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: David Ahern <dsahern@gmail.com>
+Date: Wed, 2 Jan 2019 18:57:09 -0800
+Subject: ipv6: Consider sk_bound_dev_if when binding a socket to an address
+
+From: David Ahern <dsahern@gmail.com>
+
+[ Upstream commit c5ee066333ebc322a24a00a743ed941a0c68617e ]
+
+IPv6 does not consider if the socket is bound to a device when binding
+to an address. The result is that a socket can be bound to eth0 and then
+bound to the address of eth1. If the device is a VRF, the result is that
+a socket can only be bound to an address in the default VRF.
+
+Resolve by considering the device if sk_bound_dev_if is set.
+
+This problem exists from the beginning of git history.
+
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/af_inet6.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -361,6 +361,9 @@ static int __inet6_bind(struct sock *sk,
+ err = -EINVAL;
+ goto out_unlock;
+ }
++ }
++
++ if (sk->sk_bound_dev_if) {
+ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ if (!dev) {
+ err = -ENODEV;
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Yohei Kanemaru <yohei.kanemaru@gmail.com>
+Date: Tue, 29 Jan 2019 15:52:34 +0900
+Subject: ipv6: sr: clear IP6CB(skb) on SRH ip4ip6 encapsulation
+
+From: Yohei Kanemaru <yohei.kanemaru@gmail.com>
+
+[ Upstream commit ef489749aae508e6f17886775c075f12ff919fb1 ]
+
+skb->cb may contain data from previous layers (in an observed case
+IPv4 with L3 Master Device). In the observed scenario, the data in
+IPCB(skb)->frags was misinterpreted as IP6CB(skb)->frag_max_size,
+eventually caused an unexpected IPv6 fragmentation in ip6_fragment()
+through ip6_finish_output().
+
+This patch clears IP6CB(skb), which potentially contains garbage data,
+on the SRH ip4ip6 encapsulation.
+
+Fixes: 32d99d0b6702 ("ipv6: sr: add support for ip4ip6 encapsulation")
+Signed-off-by: Yohei Kanemaru <yohei.kanemaru@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/seg6_iptunnel.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv6/seg6_iptunnel.c
++++ b/net/ipv6/seg6_iptunnel.c
+@@ -146,6 +146,8 @@ int seg6_do_srh_encap(struct sk_buff *sk
+ } else {
+ ip6_flow_hdr(hdr, 0, flowlabel);
+ hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
++
++ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ }
+
+ hdr->nexthdr = NEXTHDR_ROUTING;
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 30 Jan 2019 12:49:48 +0100
+Subject: ipvlan, l3mdev: fix broken l3s mode wrt local routes
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit d5256083f62e2720f75bb3c5a928a0afe47d6bc3 ]
+
+While implementing ipvlan l3 and l3s mode for kubernetes CNI plugin,
+I ran into the issue that while l3 mode is working fine, l3s mode
+does not have any connectivity to kube-apiserver and hence all pods
+end up in Error state as well. The ipvlan master device sits on
+top of a bond device and hostns traffic to kube-apiserver (also running
+in hostns) is DNATed from 10.152.183.1:443 to 139.178.29.207:37573
+where the latter is the address of the bond0. While in l3 mode, a
+curl to https://10.152.183.1:443 or to https://139.178.29.207:37573
+works fine from hostns, neither of them do in case of l3s. In the
+latter only a curl to https://127.0.0.1:37573 appeared to work where
+for local addresses of bond0 I saw kernel suddenly starting to emit
+ARP requests to query HW address of bond0 which remained unanswered
+and neighbor entries in INCOMPLETE state. These ARP requests only
+happen while in l3s.
+
+Debugging this further, I found the issue is that l3s mode is piggy-
+backing on l3 master device, and in this case local routes are using
+l3mdev_master_dev_rcu(dev) instead of net->loopback_dev as per commit
+f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev
+if relevant") and 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be
+a loopback"). I found that reverting them back into using the
+net->loopback_dev fixed ipvlan l3s connectivity and got everything
+working for the CNI.
+
+Now judging from 4fbae7d83c98 ("ipvlan: Introduce l3s mode") and the
+l3mdev paper in [0] the only sole reason why ipvlan l3s is relying
+on l3 master device is to get the l3mdev_ip_rcv() receive hook for
+setting the dst entry of the input route without adding its own
+ipvlan specific hacks into the receive path, however, any l3 domain
+semantics beyond just that are breaking l3s operation. Note that
+ipvlan also has the ability to dynamically switch its internal
+operation from l3 to l3s for all ports via ipvlan_set_port_mode()
+at runtime. In any case, l3 vs l3s soley distinguishes itself by
+'de-confusing' netfilter through switching skb->dev to ipvlan slave
+device late in NF_INET_LOCAL_IN before handing the skb to L4.
+
+Minimal fix taken here is to add a IFF_L3MDEV_RX_HANDLER flag which,
+if set from ipvlan setup, gets us only the wanted l3mdev_l3_rcv() hook
+without any additional l3mdev semantics on top. This should also have
+minimal impact since dev->priv_flags is already hot in cache. With
+this set, l3s mode is working fine and I also get things like
+masquerading pod traffic on the ipvlan master properly working.
+
+ [0] https://netdevconf.org/1.2/papers/ahern-what-is-l3mdev-paper.pdf
+
+Fixes: f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant")
+Fixes: 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be a loopback")
+Fixes: 4fbae7d83c98 ("ipvlan: Introduce l3s mode")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Cc: Mahesh Bandewar <maheshb@google.com>
+Cc: David Ahern <dsa@cumulusnetworks.com>
+Cc: Florian Westphal <fw@strlen.de>
+Cc: Martynas Pumputis <m@lambda.lt>
+Acked-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipvlan/ipvlan_main.c | 6 +++---
+ include/linux/netdevice.h | 8 ++++++++
+ include/net/l3mdev.h | 3 ++-
+ 3 files changed, 13 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ipvlan/ipvlan_main.c
++++ b/drivers/net/ipvlan/ipvlan_main.c
+@@ -97,12 +97,12 @@ static int ipvlan_set_port_mode(struct i
+ err = ipvlan_register_nf_hook(read_pnet(&port->pnet));
+ if (!err) {
+ mdev->l3mdev_ops = &ipvl_l3mdev_ops;
+- mdev->priv_flags |= IFF_L3MDEV_MASTER;
++ mdev->priv_flags |= IFF_L3MDEV_RX_HANDLER;
+ } else
+ goto fail;
+ } else if (port->mode == IPVLAN_MODE_L3S) {
+ /* Old mode was L3S */
+- mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
++ mdev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER;
+ ipvlan_unregister_nf_hook(read_pnet(&port->pnet));
+ mdev->l3mdev_ops = NULL;
+ }
+@@ -162,7 +162,7 @@ static void ipvlan_port_destroy(struct n
+ struct sk_buff *skb;
+
+ if (port->mode == IPVLAN_MODE_L3S) {
+- dev->priv_flags &= ~IFF_L3MDEV_MASTER;
++ dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER;
+ ipvlan_unregister_nf_hook(dev_net(dev));
+ dev->l3mdev_ops = NULL;
+ }
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1455,6 +1455,7 @@ struct net_device_ops {
+ * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook
+ * @IFF_FAILOVER: device is a failover master device
+ * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
++ * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
+ */
+ enum netdev_priv_flags {
+ IFF_802_1Q_VLAN = 1<<0,
+@@ -1486,6 +1487,7 @@ enum netdev_priv_flags {
+ IFF_NO_RX_HANDLER = 1<<26,
+ IFF_FAILOVER = 1<<27,
+ IFF_FAILOVER_SLAVE = 1<<28,
++ IFF_L3MDEV_RX_HANDLER = 1<<29,
+ };
+
+ #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
+@@ -1516,6 +1518,7 @@ enum netdev_priv_flags {
+ #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER
+ #define IFF_FAILOVER IFF_FAILOVER
+ #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
++#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER
+
+ /**
+ * struct net_device - The DEVICE structure.
+@@ -4464,6 +4467,11 @@ static inline bool netif_supports_nofcs(
+ return dev->priv_flags & IFF_SUPP_NOFCS;
+ }
+
++static inline bool netif_has_l3_rx_handler(const struct net_device *dev)
++{
++ return dev->priv_flags & IFF_L3MDEV_RX_HANDLER;
++}
++
+ static inline bool netif_is_l3_master(const struct net_device *dev)
+ {
+ return dev->priv_flags & IFF_L3MDEV_MASTER;
+--- a/include/net/l3mdev.h
++++ b/include/net/l3mdev.h
+@@ -142,7 +142,8 @@ struct sk_buff *l3mdev_l3_rcv(struct sk_
+
+ if (netif_is_l3_slave(skb->dev))
+ master = netdev_master_upper_dev_get_rcu(skb->dev);
+- else if (netif_is_l3_master(skb->dev))
++ else if (netif_is_l3_master(skb->dev) ||
++ netif_has_l3_rx_handler(skb->dev))
+ master = skb->dev;
+
+ if (master && master->l3mdev_ops->l3mdev_l3_rcv)
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Jacob Wen <jian.w.wen@oracle.com>
+Date: Thu, 31 Jan 2019 15:18:56 +0800
+Subject: l2tp: copy 4 more bytes to linear part if necessary
+
+From: Jacob Wen <jian.w.wen@oracle.com>
+
+[ Upstream commit 91c524708de6207f59dd3512518d8a1c7b434ee3 ]
+
+The size of L2TPv2 header with all optional fields is 14 bytes.
+l2tp_udp_recv_core only moves 10 bytes to the linear part of a
+skb. This may lead to l2tp_recv_common read data outside of a skb.
+
+This patch make sure that there is at least 14 bytes in the linear
+part of a skb to meet the maximum need of l2tp_udp_recv_core and
+l2tp_recv_common. The minimum size of both PPP HDLC-like frame and
+Ethernet frame is larger than 14 bytes, so we are safe to do so.
+
+Also remove L2TP_HDR_SIZE_NOSEQ, it is unused now.
+
+Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
+Suggested-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: Jacob Wen <jian.w.wen@oracle.com>
+Acked-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_core.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -83,8 +83,7 @@
+ #define L2TP_SLFLAG_S 0x40000000
+ #define L2TP_SL_SEQ_MASK 0x00ffffff
+
+-#define L2TP_HDR_SIZE_SEQ 10
+-#define L2TP_HDR_SIZE_NOSEQ 6
++#define L2TP_HDR_SIZE_MAX 14
+
+ /* Default trace flags */
+ #define L2TP_DEFAULT_DEBUG_FLAGS 0
+@@ -808,7 +807,7 @@ static int l2tp_udp_recv_core(struct l2t
+ __skb_pull(skb, sizeof(struct udphdr));
+
+ /* Short packet? */
+- if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
++ if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {
+ l2tp_info(tunnel, L2TP_MSG_DATA,
+ "%s: recv short packet (len=%d)\n",
+ tunnel->name, skb->len);
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Jacob Wen <jian.w.wen@oracle.com>
+Date: Wed, 30 Jan 2019 14:55:14 +0800
+Subject: l2tp: fix reading optional fields of L2TPv3
+
+From: Jacob Wen <jian.w.wen@oracle.com>
+
+[ Upstream commit 4522a70db7aa5e77526a4079628578599821b193 ]
+
+Use pskb_may_pull() to make sure the optional fields are in skb linear
+parts, so we can safely read them later.
+
+It's easy to reproduce the issue with a net driver that supports paged
+skb data. Just create a L2TPv3 over IP tunnel and then generates some
+network traffic.
+Once reproduced, rx err in /sys/kernel/debug/l2tp/tunnels will increase.
+
+Changes in v4:
+1. s/l2tp_v3_pull_opt/l2tp_v3_ensure_opt_in_linear/
+2. s/tunnel->version != L2TP_HDR_VER_2/tunnel->version == L2TP_HDR_VER_3/
+3. Add 'Fixes' in commit messages.
+
+Changes in v3:
+1. To keep consistency, move the code out of l2tp_recv_common.
+2. Use "net" instead of "net-next", since this is a bug fix.
+
+Changes in v2:
+1. Only fix L2TPv3 to make code simple.
+ To fix both L2TPv3 and L2TPv2, we'd better refactor l2tp_recv_common.
+ It's complicated to do so.
+2. Reloading pointers after pskb_may_pull
+
+Fixes: f7faffa3ff8e ("l2tp: Add L2TPv3 protocol support")
+Fixes: 0d76751fad77 ("l2tp: Add L2TPv3 IP encapsulation (no UDP) support")
+Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6")
+Signed-off-by: Jacob Wen <jian.w.wen@oracle.com>
+Acked-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_core.c | 4 ++++
+ net/l2tp/l2tp_core.h | 20 ++++++++++++++++++++
+ net/l2tp/l2tp_ip.c | 3 +++
+ net/l2tp/l2tp_ip6.c | 3 +++
+ 4 files changed, 30 insertions(+)
+
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -883,6 +883,10 @@ static int l2tp_udp_recv_core(struct l2t
+ goto error;
+ }
+
++ if (tunnel->version == L2TP_HDR_VER_3 &&
++ l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
++ goto error;
++
+ l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
+ l2tp_session_dec_refcount(session);
+
+--- a/net/l2tp/l2tp_core.h
++++ b/net/l2tp/l2tp_core.h
+@@ -301,6 +301,26 @@ static inline bool l2tp_tunnel_uses_xfrm
+ }
+ #endif
+
++static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb,
++ unsigned char **ptr, unsigned char **optr)
++{
++ int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session);
++
++ if (opt_len > 0) {
++ int off = *ptr - *optr;
++
++ if (!pskb_may_pull(skb, off + opt_len))
++ return -1;
++
++ if (skb->data != *optr) {
++ *optr = skb->data;
++ *ptr = skb->data + off;
++ }
++ }
++
++ return 0;
++}
++
+ #define l2tp_printk(ptr, type, func, fmt, ...) \
+ do { \
+ if (((ptr)->debug) & (type)) \
+--- a/net/l2tp/l2tp_ip.c
++++ b/net/l2tp/l2tp_ip.c
+@@ -165,6 +165,9 @@ static int l2tp_ip_recv(struct sk_buff *
+ print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
+ }
+
++ if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
++ goto discard_sess;
++
+ l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
+ l2tp_session_dec_refcount(session);
+
+--- a/net/l2tp/l2tp_ip6.c
++++ b/net/l2tp/l2tp_ip6.c
+@@ -178,6 +178,9 @@ static int l2tp_ip6_recv(struct sk_buff
+ print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
+ }
+
++ if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
++ goto discard_sess;
++
+ l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
+ l2tp_session_dec_refcount(session);
+
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Date: Mon, 28 Jan 2019 22:23:49 +0100
+Subject: net: ip6_gre: always reports o_key to userspace
+
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+
+[ Upstream commit c706863bc8902d0c2d1a5a27ac8e1ead5d06b79d ]
+
+As Erspan_v4, Erspan_v6 protocol relies on o_key to configure
+session id header field. However TUNNEL_KEY bit is cleared in
+ip6erspan_tunnel_xmit since ERSPAN protocol does not set the key field
+of the external GRE header and so the configured o_key is not reported
+to userspace. The issue can be triggered with the following reproducer:
+
+$ip link add ip6erspan1 type ip6erspan local 2000::1 remote 2000::2 \
+ key 1 seq erspan_ver 1
+$ip link set ip6erspan1 up
+ip -d link sh ip6erspan1
+
+ip6erspan1@NONE: <BROADCAST,MULTICAST> mtu 1422 qdisc noop state DOWN mode DEFAULT
+ link/ether ba:ff:09:24:c3:0e brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500
+ ip6erspan remote 2000::2 local 2000::1 encaplimit 4 flowlabel 0x00000 ikey 0.0.0.1 iseq oseq
+
+Fix the issue adding TUNNEL_KEY bit to the o_flags parameter in
+ip6gre_fill_info
+
+Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
+Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -2120,12 +2120,17 @@ static int ip6gre_fill_info(struct sk_bu
+ {
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct __ip6_tnl_parm *p = &t->parms;
++ __be16 o_flags = p->o_flags;
++
++ if ((p->erspan_ver == 1 || p->erspan_ver == 2) &&
++ !p->collect_md)
++ o_flags |= TUNNEL_KEY;
+
+ if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
+ nla_put_be16(skb, IFLA_GRE_IFLAGS,
+ gre_tnl_flags_to_gre_flags(p->i_flags)) ||
+ nla_put_be16(skb, IFLA_GRE_OFLAGS,
+- gre_tnl_flags_to_gre_flags(p->o_flags)) ||
++ gre_tnl_flags_to_gre_flags(o_flags)) ||
+ nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
+ nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
+ nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Date: Mon, 28 Jan 2019 22:23:48 +0100
+Subject: net: ip_gre: always reports o_key to userspace
+
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+
+[ Upstream commit feaf5c796b3f0240f10d0d6d0b686715fd58a05b ]
+
+Erspan protocol (version 1 and 2) relies on o_key to configure
+session id header field. However TUNNEL_KEY bit is cleared in
+erspan_xmit since ERSPAN protocol does not set the key field
+of the external GRE header and so the configured o_key is not reported
+to userspace. The issue can be triggered with the following reproducer:
+
+$ip link add erspan1 type erspan local 192.168.0.1 remote 192.168.0.2 \
+ key 1 seq erspan_ver 1
+$ip link set erspan1 up
+$ip -d link sh erspan1
+
+erspan1@NONE: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc pfifo_fast state UNKNOWN mode DEFAULT
+ link/ether 52:aa:99:95:9a:b5 brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500
+ erspan remote 192.168.0.2 local 192.168.0.1 ttl inherit ikey 0.0.0.1 iseq oseq erspan_index 0
+
+Fix the issue adding TUNNEL_KEY bit to the o_flags parameter in
+ipgre_fill_info
+
+Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN")
+Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_gre.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -1471,12 +1471,17 @@ static int ipgre_fill_info(struct sk_buf
+ {
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm *p = &t->parms;
++ __be16 o_flags = p->o_flags;
++
++ if ((t->erspan_ver == 1 || t->erspan_ver == 2) &&
++ !t->collect_md)
++ o_flags |= TUNNEL_KEY;
+
+ if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
+ nla_put_be16(skb, IFLA_GRE_IFLAGS,
+ gre_tnl_flags_to_gre_flags(p->i_flags)) ||
+ nla_put_be16(skb, IFLA_GRE_OFLAGS,
+- gre_tnl_flags_to_gre_flags(p->o_flags)) ||
++ gre_tnl_flags_to_gre_flags(o_flags)) ||
+ nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
+ nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
+ nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Date: Fri, 18 Jan 2019 12:05:39 +0100
+Subject: net: ip_gre: use erspan key field for tunnel lookup
+
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+
+[ Upstream commit cb73ee40b1b381eaf3749e6dbeed567bb38e5258 ]
+
+Use ERSPAN key header field as tunnel key in gre_parse_header routine
+since ERSPAN protocol sets the key field of the external GRE header to
+0 resulting in a tunnel lookup fail in ip6gre_err.
+In addition remove key field parsing and pskb_may_pull check in
+erspan_rcv and ip6erspan_rcv
+
+Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
+Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/gre_demux.c | 17 +++++++++++++++++
+ net/ipv4/ip_gre.c | 9 ---------
+ net/ipv6/ip6_gre.c | 4 ----
+ 3 files changed, 17 insertions(+), 13 deletions(-)
+
+--- a/net/ipv4/gre_demux.c
++++ b/net/ipv4/gre_demux.c
+@@ -25,6 +25,7 @@
+ #include <linux/spinlock.h>
+ #include <net/protocol.h>
+ #include <net/gre.h>
++#include <net/erspan.h>
+
+ #include <net/icmp.h>
+ #include <net/route.h>
+@@ -118,6 +119,22 @@ int gre_parse_header(struct sk_buff *skb
+ hdr_len += 4;
+ }
+ tpi->hdr_len = hdr_len;
++
++ /* ERSPAN ver 1 and 2 protocol sets GRE key field
++ * to 0 and sets the configured key in the
++ * inner erspan header field
++ */
++ if (greh->protocol == htons(ETH_P_ERSPAN) ||
++ greh->protocol == htons(ETH_P_ERSPAN2)) {
++ struct erspan_base_hdr *ershdr;
++
++ if (!pskb_may_pull(skb, nhs + hdr_len + sizeof(*ershdr)))
++ return -EINVAL;
++
++ ershdr = (struct erspan_base_hdr *)options;
++ tpi->key = cpu_to_be32(get_session_id(ershdr));
++ }
++
+ return hdr_len;
+ }
+ EXPORT_SYMBOL(gre_parse_header);
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -269,20 +269,11 @@ static int erspan_rcv(struct sk_buff *sk
+ int len;
+
+ itn = net_generic(net, erspan_net_id);
+- len = gre_hdr_len + sizeof(*ershdr);
+-
+- /* Check based hdr len */
+- if (unlikely(!pskb_may_pull(skb, len)))
+- return PACKET_REJECT;
+
+ iph = ip_hdr(skb);
+ ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
+ ver = ershdr->ver;
+
+- /* The original GRE header does not have key field,
+- * Use ERSPAN 10-bit session ID as key.
+- */
+- tpi->key = cpu_to_be32(get_session_id(ershdr));
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
+ tpi->flags | TUNNEL_KEY,
+ iph->saddr, iph->daddr, tpi->key);
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -550,13 +550,9 @@ static int ip6erspan_rcv(struct sk_buff
+ struct ip6_tnl *tunnel;
+ u8 ver;
+
+- if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+- return PACKET_REJECT;
+-
+ ipv6h = ipv6_hdr(skb);
+ ershdr = (struct erspan_base_hdr *)skb->data;
+ ver = ershdr->ver;
+- tpi->key = cpu_to_be32(get_session_id(ershdr));
+
+ tunnel = ip6gre_tunnel_lookup(skb->dev,
+ &ipv6h->saddr, &ipv6h->daddr, tpi->key,
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Aya Levin <ayal@mellanox.com>
+Date: Tue, 22 Jan 2019 15:19:44 +0200
+Subject: net/mlx4_core: Add masking for a few queries on HCA caps
+
+From: Aya Levin <ayal@mellanox.com>
+
+[ Upstream commit a40ded6043658444ee4dd6ee374119e4e98b33fc ]
+
+Driver reads the query HCA capabilities without the corresponding masks.
+Without the correct masks, the base addresses of the queues are
+unaligned. In addition some reserved bits were wrongly read. Using the
+correct masks, ensures alignment of the base addresses and allows future
+firmware versions safe use of the reserved bits.
+
+Fixes: ab9c17a009ee ("mlx4_core: Modify driver initialization flow to accommodate SRIOV for Ethernet")
+Fixes: 0ff1fb654bec ("{NET, IB}/mlx4: Add device managed flow steering firmware API")
+Signed-off-by: Aya Levin <ayal@mellanox.com>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/fw.c | 75 +++++++++++++++++++-------------
+ 1 file changed, 46 insertions(+), 29 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
++++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
+@@ -2064,9 +2064,11 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+ {
+ struct mlx4_cmd_mailbox *mailbox;
+ __be32 *outbox;
++ u64 qword_field;
+ u32 dword_field;
+- int err;
++ u16 word_field;
+ u8 byte_field;
++ int err;
+ static const u8 a0_dmfs_query_hw_steering[] = {
+ [0] = MLX4_STEERING_DMFS_A0_DEFAULT,
+ [1] = MLX4_STEERING_DMFS_A0_DYNAMIC,
+@@ -2094,19 +2096,32 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+
+ /* QPC/EEC/CQC/EQC/RDMARC attributes */
+
+- MLX4_GET(param->qpc_base, outbox, INIT_HCA_QPC_BASE_OFFSET);
+- MLX4_GET(param->log_num_qps, outbox, INIT_HCA_LOG_QP_OFFSET);
+- MLX4_GET(param->srqc_base, outbox, INIT_HCA_SRQC_BASE_OFFSET);
+- MLX4_GET(param->log_num_srqs, outbox, INIT_HCA_LOG_SRQ_OFFSET);
+- MLX4_GET(param->cqc_base, outbox, INIT_HCA_CQC_BASE_OFFSET);
+- MLX4_GET(param->log_num_cqs, outbox, INIT_HCA_LOG_CQ_OFFSET);
+- MLX4_GET(param->altc_base, outbox, INIT_HCA_ALTC_BASE_OFFSET);
+- MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET);
+- MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET);
+- MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET);
+- MLX4_GET(param->num_sys_eqs, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET);
+- MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET);
+- MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET);
++ MLX4_GET(qword_field, outbox, INIT_HCA_QPC_BASE_OFFSET);
++ param->qpc_base = qword_field & ~((u64)0x1f);
++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_QP_OFFSET);
++ param->log_num_qps = byte_field & 0x1f;
++ MLX4_GET(qword_field, outbox, INIT_HCA_SRQC_BASE_OFFSET);
++ param->srqc_base = qword_field & ~((u64)0x1f);
++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_SRQ_OFFSET);
++ param->log_num_srqs = byte_field & 0x1f;
++ MLX4_GET(qword_field, outbox, INIT_HCA_CQC_BASE_OFFSET);
++ param->cqc_base = qword_field & ~((u64)0x1f);
++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_CQ_OFFSET);
++ param->log_num_cqs = byte_field & 0x1f;
++ MLX4_GET(qword_field, outbox, INIT_HCA_ALTC_BASE_OFFSET);
++ param->altc_base = qword_field;
++ MLX4_GET(qword_field, outbox, INIT_HCA_AUXC_BASE_OFFSET);
++ param->auxc_base = qword_field;
++ MLX4_GET(qword_field, outbox, INIT_HCA_EQC_BASE_OFFSET);
++ param->eqc_base = qword_field & ~((u64)0x1f);
++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_EQ_OFFSET);
++ param->log_num_eqs = byte_field & 0x1f;
++ MLX4_GET(word_field, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET);
++ param->num_sys_eqs = word_field & 0xfff;
++ MLX4_GET(qword_field, outbox, INIT_HCA_RDMARC_BASE_OFFSET);
++ param->rdmarc_base = qword_field & ~((u64)0x1f);
++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_RD_OFFSET);
++ param->log_rd_per_qp = byte_field & 0x7;
+
+ MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET);
+ if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) {
+@@ -2125,22 +2140,21 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+ /* steering attributes */
+ if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET);
+- MLX4_GET(param->log_mc_entry_sz, outbox,
+- INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
+- MLX4_GET(param->log_mc_table_sz, outbox,
+- INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
+- MLX4_GET(byte_field, outbox,
+- INIT_HCA_FS_A0_OFFSET);
++ MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
++ param->log_mc_entry_sz = byte_field & 0x1f;
++ MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
++ param->log_mc_table_sz = byte_field & 0x1f;
++ MLX4_GET(byte_field, outbox, INIT_HCA_FS_A0_OFFSET);
+ param->dmfs_high_steer_mode =
+ a0_dmfs_query_hw_steering[(byte_field >> 6) & 3];
+ } else {
+ MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET);
+- MLX4_GET(param->log_mc_entry_sz, outbox,
+- INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+- MLX4_GET(param->log_mc_hash_sz, outbox,
+- INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+- MLX4_GET(param->log_mc_table_sz, outbox,
+- INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
++ param->log_mc_entry_sz = byte_field & 0x1f;
++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
++ param->log_mc_hash_sz = byte_field & 0x1f;
++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
++ param->log_mc_table_sz = byte_field & 0x1f;
+ }
+
+ /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
+@@ -2164,15 +2178,18 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+ /* TPT attributes */
+
+ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET);
+- MLX4_GET(param->mw_enabled, outbox, INIT_HCA_TPT_MW_OFFSET);
+- MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET);
++ MLX4_GET(byte_field, outbox, INIT_HCA_TPT_MW_OFFSET);
++ param->mw_enabled = byte_field >> 7;
++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET);
++ param->log_mpt_sz = byte_field & 0x3f;
+ MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET);
+ MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET);
+
+ /* UAR attributes */
+
+ MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET);
+- MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET);
++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET);
++ param->log_uar_sz = byte_field & 0xf;
+
+ /* phv_check enable */
+ MLX4_GET(byte_field, outbox, INIT_HCA_CACHELINE_SZ_OFFSET);
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Aya Levin <ayal@mellanox.com>
+Date: Mon, 24 Dec 2018 09:48:42 +0200
+Subject: net/mlx5e: Allow MAC invalidation while spoofchk is ON
+
+From: Aya Levin <ayal@mellanox.com>
+
+[ Upstream commit 9d2cbdc5d334967c35b5f58c7bf3208e17325647 ]
+
+Prior to this patch the driver prohibited spoof checking on invalid MAC.
+Now the user can set this configuration if it wishes to.
+
+This is required since libvirt might invalidate the VF Mac by setting it
+to zero, while spoofcheck is ON.
+
+Fixes: 1ab2068a4c66 ("net/mlx5: Implement vports admin state backup/restore")
+Signed-off-by: Aya Levin <ayal@mellanox.com>
+Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 18 ++++++------------
+ 1 file changed, 6 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -1133,13 +1133,6 @@ static int esw_vport_ingress_config(stru
+ int err = 0;
+ u8 *smac_v;
+
+- if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) {
+- mlx5_core_warn(esw->dev,
+- "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
+- vport->vport);
+- return -EPERM;
+- }
+-
+ esw_vport_cleanup_ingress_rules(esw, vport);
+
+ if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
+@@ -1812,13 +1805,10 @@ int mlx5_eswitch_set_vport_mac(struct ml
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+
+- if (evport->info.spoofchk && !is_valid_ether_addr(mac)) {
++ if (evport->info.spoofchk && !is_valid_ether_addr(mac))
+ mlx5_core_warn(esw->dev,
+- "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n",
++ "Set invalid MAC while spoofchk is on, vport(%d)\n",
+ vport);
+- err = -EPERM;
+- goto unlock;
+- }
+
+ err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
+ if (err) {
+@@ -1964,6 +1954,10 @@ int mlx5_eswitch_set_vport_spoofchk(stru
+ evport = &esw->vports[vport];
+ pschk = evport->info.spoofchk;
+ evport->info.spoofchk = spoofchk;
++ if (pschk && !is_valid_ether_addr(evport->info.mac))
++ mlx5_core_warn(esw->dev,
++ "Spoofchk in set while MAC is invalid, vport(%d)\n",
++ evport->vport);
+ if (evport->enabled && esw->mode == SRIOV_LEGACY)
+ err = esw_vport_ingress_config(esw, evport);
+ if (err)
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Bernard Pidoux <f6bvp@free.fr>
+Date: Fri, 25 Jan 2019 11:46:40 +0100
+Subject: net/rose: fix NULL ax25_cb kernel panic
+
+From: Bernard Pidoux <f6bvp@free.fr>
+
+[ Upstream commit b0cf029234f9b18e10703ba5147f0389c382bccc ]
+
+When an internally generated frame is handled by rose_xmit(),
+rose_route_frame() is called:
+
+ if (!rose_route_frame(skb, NULL)) {
+ dev_kfree_skb(skb);
+ stats->tx_errors++;
+ return NETDEV_TX_OK;
+ }
+
+We have the same code sequence in Net/Rom where an internally generated
+frame is handled by nr_xmit() calling nr_route_frame(skb, NULL).
+However, in this function NULL argument is tested while it is not in
+rose_route_frame().
+Then kernel panic occurs later on when calling ax25cmp() with a NULL
+ax25_cb argument as reported many times and recently with syzbot.
+
+We need to test if ax25 is NULL before using it.
+
+Testing:
+Built kernel with CONFIG_ROSE=y.
+
+Signed-off-by: Bernard Pidoux <f6bvp@free.fr>
+Acked-by: Dmitry Vyukov <dvyukov@google.com>
+Reported-by: syzbot+1a2c456a1ea08fa5b5f7@syzkaller.appspotmail.com
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Ralf Baechle <ralf@linux-mips.org>
+Cc: Bernard Pidoux <f6bvp@free.fr>
+Cc: linux-hams@vger.kernel.org
+Cc: netdev@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rose/rose_route.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/net/rose/rose_route.c
++++ b/net/rose/rose_route.c
+@@ -850,6 +850,7 @@ void rose_link_device_down(struct net_de
+
+ /*
+ * Route a frame to an appropriate AX.25 connection.
++ * A NULL ax25_cb indicates an internally generated frame.
+ */
+ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
+ {
+@@ -867,6 +868,10 @@ int rose_route_frame(struct sk_buff *skb
+
+ if (skb->len < ROSE_MIN_LEN)
+ return res;
++
++ if (!ax25)
++ return rose_loopback_queue(skb, NULL);
++
+ frametype = skb->data[2];
+ lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
+ if (frametype == ROSE_CALL_REQUEST &&
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Josh Elsasser <jelsasser@appneta.com>
+Date: Sat, 26 Jan 2019 14:38:33 -0800
+Subject: net: set default network namespace in init_dummy_netdev()
+
+From: Josh Elsasser <jelsasser@appneta.com>
+
+[ Upstream commit 35edfdc77f683c8fd27d7732af06cf6489af60a5 ]
+
+Assign a default net namespace to netdevs created by init_dummy_netdev().
+Fixes a NULL pointer dereference caused by busy-polling a socket bound to
+an iwlwifi wireless device, which bumps the per-net BUSYPOLLRXPACKETS stat
+if napi_poll() received packets:
+
+ BUG: unable to handle kernel NULL pointer dereference at 0000000000000190
+ IP: napi_busy_loop+0xd6/0x200
+ Call Trace:
+ sock_poll+0x5e/0x80
+ do_sys_poll+0x324/0x5a0
+ SyS_poll+0x6c/0xf0
+ do_syscall_64+0x6b/0x1f0
+ entry_SYSCALL_64_after_hwframe+0x3d/0xa2
+
+Fixes: 7db6b048da3b ("net: Commonize busy polling code to focus on napi_id instead of socket")
+Signed-off-by: Josh Elsasser <jelsasser@appneta.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -8599,6 +8599,9 @@ int init_dummy_netdev(struct net_device
+ set_bit(__LINK_STATE_PRESENT, &dev->state);
+ set_bit(__LINK_STATE_START, &dev->state);
+
++ /* napi_busy_loop stats accounting wants this */
++ dev_net_set(dev, &init_net);
++
+ /* Note : We dont allocate pcpu_refcnt for dummy devices,
+ * because users of this 'device' dont need to change
+ * its refcount.
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Thu, 24 Jan 2019 14:18:18 -0800
+Subject: netrom: switch to sock timer API
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 63346650c1a94a92be61a57416ac88c0a47c4327 ]
+
+sk_reset_timer() and sk_stop_timer() properly handle
+sock refcnt for timer function. Switching to them
+could fix a refcounting bug reported by syzbot.
+
+Reported-and-tested-by: syzbot+defa700d16f1bd1b9a05@syzkaller.appspotmail.com
+Cc: Ralf Baechle <ralf@linux-mips.org>
+Cc: linux-hams@vger.kernel.org
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netrom/nr_timer.c | 20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/net/netrom/nr_timer.c
++++ b/net/netrom/nr_timer.c
+@@ -52,21 +52,21 @@ void nr_start_t1timer(struct sock *sk)
+ {
+ struct nr_sock *nr = nr_sk(sk);
+
+- mod_timer(&nr->t1timer, jiffies + nr->t1);
++ sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1);
+ }
+
+ void nr_start_t2timer(struct sock *sk)
+ {
+ struct nr_sock *nr = nr_sk(sk);
+
+- mod_timer(&nr->t2timer, jiffies + nr->t2);
++ sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2);
+ }
+
+ void nr_start_t4timer(struct sock *sk)
+ {
+ struct nr_sock *nr = nr_sk(sk);
+
+- mod_timer(&nr->t4timer, jiffies + nr->t4);
++ sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4);
+ }
+
+ void nr_start_idletimer(struct sock *sk)
+@@ -74,37 +74,37 @@ void nr_start_idletimer(struct sock *sk)
+ struct nr_sock *nr = nr_sk(sk);
+
+ if (nr->idle > 0)
+- mod_timer(&nr->idletimer, jiffies + nr->idle);
++ sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle);
+ }
+
+ void nr_start_heartbeat(struct sock *sk)
+ {
+- mod_timer(&sk->sk_timer, jiffies + 5 * HZ);
++ sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ);
+ }
+
+ void nr_stop_t1timer(struct sock *sk)
+ {
+- del_timer(&nr_sk(sk)->t1timer);
++ sk_stop_timer(sk, &nr_sk(sk)->t1timer);
+ }
+
+ void nr_stop_t2timer(struct sock *sk)
+ {
+- del_timer(&nr_sk(sk)->t2timer);
++ sk_stop_timer(sk, &nr_sk(sk)->t2timer);
+ }
+
+ void nr_stop_t4timer(struct sock *sk)
+ {
+- del_timer(&nr_sk(sk)->t4timer);
++ sk_stop_timer(sk, &nr_sk(sk)->t4timer);
+ }
+
+ void nr_stop_idletimer(struct sock *sk)
+ {
+- del_timer(&nr_sk(sk)->idletimer);
++ sk_stop_timer(sk, &nr_sk(sk)->idletimer);
+ }
+
+ void nr_stop_heartbeat(struct sock *sk)
+ {
+- del_timer(&sk->sk_timer);
++ sk_stop_timer(sk, &sk->sk_timer);
+ }
+
+ int nr_t1timer_running(struct sock *sk)
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Simon Horman <horms+renesas@verge.net.au>
+Date: Wed, 23 Jan 2019 12:14:52 +0100
+Subject: ravb: expand rx descriptor data to accommodate hw checksum
+
+From: Simon Horman <horms+renesas@verge.net.au>
+
+[ Upstream commit 12da64300fbc76b875900445f4146c3dc617d43e ]
+
+EtherAVB may provide a checksum of packet data appended to packet data. In
+order to allow this checksum to be received by the host descriptor data
+needs to be enlarged by 2 bytes to accommodate the checksum.
+
+In the case of MTU-sized packets without a VLAN tag the
+checksum were already accommodated by virtue of the space reserved for the
+VLAN tag. However, a packet of MTU-size with a VLAN tag consumed all
+packet data space provided by a descriptor leaving no space for the
+trailing checksum.
+
+This was not detected by the driver which incorrectly used the last two
+bytes of packet data as the checksum and truncate the packet by two bytes.
+This resulted all such packets being dropped.
+
+A work around is to disable RX checksum offload
+ # ethtool -K eth0 rx off
+
+This patch resolves this problem by increasing the size available for
+packet data in RX descriptors by two bytes.
+
+Tested on R-Car E3 (r8a77990) ES1.0 based Ebisu-4D board
+
+v2
+* Use sizeof(__sum16) directly rather than adding a driver-local
+ #define for the size of the checksum provided by the hw (2 bytes).
+
+Fixes: 4d86d3818627 ("ravb: RX checksum offload")
+Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
+Reviewed-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/renesas/ravb_main.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/renesas/ravb_main.c
++++ b/drivers/net/ethernet/renesas/ravb_main.c
+@@ -344,7 +344,7 @@ static int ravb_ring_init(struct net_dev
+ int i;
+
+ priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
+- ETH_HLEN + VLAN_HLEN;
++ ETH_HLEN + VLAN_HLEN + sizeof(__sum16);
+
+ /* Allocate RX and TX skb rings */
+ priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
+@@ -525,13 +525,15 @@ static void ravb_rx_csum(struct sk_buff
+ {
+ u8 *hw_csum;
+
+- /* The hardware checksum is 2 bytes appended to packet data */
+- if (unlikely(skb->len < 2))
++ /* The hardware checksum is contained in sizeof(__sum16) (2) bytes
++ * appended to packet data
++ */
++ if (unlikely(skb->len < sizeof(__sum16)))
+ return;
+- hw_csum = skb_tail_pointer(skb) - 2;
++ hw_csum = skb_tail_pointer(skb) - sizeof(__sum16);
+ skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum));
+ skb->ip_summed = CHECKSUM_COMPLETE;
+- skb_trim(skb, skb->len - 2);
++ skb_trim(skb, skb->len - sizeof(__sum16));
+ }
+
+ /* Packet receive function for Ethernet AVB */
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Bodong Wang <bodong@mellanox.com>
+Date: Sun, 13 Jan 2019 22:47:26 -0600
+Subject: Revert "net/mlx5e: E-Switch, Initialize eswitch only if eswitch manager"
+
+From: Bodong Wang <bodong@mellanox.com>
+
+[ Upstream commit 4e046de0f50e04acd48eb373d6a9061ddf014e0c ]
+
+This reverts commit 5f5991f36dce1e69dd8bd7495763eec2e28f08e7.
+
+With the original commit, eswitch instance will not be initialized for
+a function which is vport group manager but not eswitch manager such as
+host PF on SmartNIC (BlueField) card. This will result in a kernel crash
+when such a vport group manager is trying to access vports in its group.
+E.g, PF vport manager (not eswitch manager) tries to configure the MAC
+of its VF vport, a kernel trace will happen similar as bellow:
+
+ BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
+ ...
+ RIP: 0010:mlx5_eswitch_get_vport_config+0xc/0x180 [mlx5_core]
+ ...
+
+Fixes: 5f5991f36dce ("net/mlx5e: E-Switch, Initialize eswitch only if eswitch manager")
+Signed-off-by: Bodong Wang <bodong@mellanox.com>
+Reported-by: Yuval Avnery <yuvalav@mellanox.com>
+Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
+Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -1689,7 +1689,7 @@ int mlx5_eswitch_init(struct mlx5_core_d
+ int vport_num;
+ int err;
+
+- if (!MLX5_ESWITCH_MANAGER(dev))
++ if (!MLX5_VPORT_MANAGER(dev))
+ return 0;
+
+ esw_info(dev,
+@@ -1758,7 +1758,7 @@ abort:
+
+ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
+ {
+- if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev))
++ if (!esw || !MLX5_VPORT_MANAGER(esw->dev))
+ return;
+
+ esw_info(esw->dev, "cleanup\n");
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 22 Jan 2019 02:40:12 +0800
+Subject: sctp: improve the events for sctp stream adding
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 8220c870cb0f4eaa4e335c9645dbd9a1c461c1dd ]
+
+This patch is to improve sctp stream adding events in 2 places:
+
+ 1. In sctp_process_strreset_addstrm_out(), move up SCTP_MAX_STREAM
+ and in stream allocation failure checks, as the adding has to
+ succeed after reconf_timer stops for the in stream adding
+ request retransmission.
+
+ 3. In sctp_process_strreset_addstrm_in(), no event should be sent,
+ as no in or out stream is added here.
+
+Fixes: 50a41591f110 ("sctp: implement receiver-side procedures for the Add Outgoing Streams Request Parameter")
+Fixes: c5c4ebb3ab87 ("sctp: implement receiver-side procedures for the Add Incoming Streams Request Parameter")
+Reported-by: Ying Xu <yinxu@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/stream.c | 19 ++++++++-----------
+ 1 file changed, 8 insertions(+), 11 deletions(-)
+
+--- a/net/sctp/stream.c
++++ b/net/sctp/stream.c
+@@ -866,6 +866,14 @@ struct sctp_chunk *sctp_process_strreset
+ if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
+ goto out;
+
++ in = ntohs(addstrm->number_of_streams);
++ incnt = stream->incnt + in;
++ if (!in || incnt > SCTP_MAX_STREAM)
++ goto out;
++
++ if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
++ goto out;
++
+ if (asoc->strreset_chunk) {
+ if (!sctp_chunk_lookup_strreset_param(
+ asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) {
+@@ -889,14 +897,6 @@ struct sctp_chunk *sctp_process_strreset
+ }
+ }
+
+- in = ntohs(addstrm->number_of_streams);
+- incnt = stream->incnt + in;
+- if (!in || incnt > SCTP_MAX_STREAM)
+- goto out;
+-
+- if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
+- goto out;
+-
+ stream->incnt = incnt;
+
+ result = SCTP_STRRESET_PERFORMED;
+@@ -966,9 +966,6 @@ struct sctp_chunk *sctp_process_strreset
+
+ result = SCTP_STRRESET_PERFORMED;
+
+- *evp = sctp_ulpevent_make_stream_change_event(asoc,
+- 0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC);
+-
+ out:
+ sctp_update_strreset_result(asoc, result);
+ err:
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 22 Jan 2019 02:39:34 +0800
+Subject: sctp: improve the events for sctp stream reset
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 2e6dc4d95110becfe0ff4c3d4749c33ea166e9e7 ]
+
+This patch is to improve sctp stream reset events in 4 places:
+
+ 1. In sctp_process_strreset_outreq(), the flag should always be set with
+ SCTP_STREAM_RESET_INCOMING_SSN instead of OUTGOING, as receiver's in
+ stream is reset here.
+ 2. In sctp_process_strreset_outreq(), move up SCTP_STRRESET_ERR_WRONG_SSN
+ check, as the reset has to succeed after reconf_timer stops for the
+ in stream reset request retransmission.
+ 3. In sctp_process_strreset_inreq(), no event should be sent, as no in
+ or out stream is reset here.
+ 4. In sctp_process_strreset_resp(), SCTP_STREAM_RESET_INCOMING_SSN or
+ OUTGOING event should always be sent for stream reset requests, no
+ matter it fails or succeeds to process the request.
+
+Fixes: 810544764536 ("sctp: implement receiver-side procedures for the Outgoing SSN Reset Request Parameter")
+Fixes: 16e1a91965b0 ("sctp: implement receiver-side procedures for the Incoming SSN Reset Request Parameter")
+Fixes: 11ae76e67a17 ("sctp: implement receiver-side procedures for the Reconf Response Parameter")
+Reported-by: Ying Xu <yinxu@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/stream.c | 39 +++++++++++++++++----------------------
+ 1 file changed, 17 insertions(+), 22 deletions(-)
+
+--- a/net/sctp/stream.c
++++ b/net/sctp/stream.c
+@@ -585,9 +585,9 @@ struct sctp_chunk *sctp_process_strreset
+ struct sctp_strreset_outreq *outreq = param.v;
+ struct sctp_stream *stream = &asoc->stream;
+ __u32 result = SCTP_STRRESET_DENIED;
+- __u16 i, nums, flags = 0;
+ __be16 *str_p = NULL;
+ __u32 request_seq;
++ __u16 i, nums;
+
+ request_seq = ntohl(outreq->request_seq);
+
+@@ -615,6 +615,15 @@ struct sctp_chunk *sctp_process_strreset
+ if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ))
+ goto out;
+
++ nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16);
++ str_p = outreq->list_of_streams;
++ for (i = 0; i < nums; i++) {
++ if (ntohs(str_p[i]) >= stream->incnt) {
++ result = SCTP_STRRESET_ERR_WRONG_SSN;
++ goto out;
++ }
++ }
++
+ if (asoc->strreset_chunk) {
+ if (!sctp_chunk_lookup_strreset_param(
+ asoc, outreq->response_seq,
+@@ -637,32 +646,19 @@ struct sctp_chunk *sctp_process_strreset
+ sctp_chunk_put(asoc->strreset_chunk);
+ asoc->strreset_chunk = NULL;
+ }
+-
+- flags = SCTP_STREAM_RESET_INCOMING_SSN;
+ }
+
+- nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16);
+- if (nums) {
+- str_p = outreq->list_of_streams;
+- for (i = 0; i < nums; i++) {
+- if (ntohs(str_p[i]) >= stream->incnt) {
+- result = SCTP_STRRESET_ERR_WRONG_SSN;
+- goto out;
+- }
+- }
+-
++ if (nums)
+ for (i = 0; i < nums; i++)
+ SCTP_SI(stream, ntohs(str_p[i]))->mid = 0;
+- } else {
++ else
+ for (i = 0; i < stream->incnt; i++)
+ SCTP_SI(stream, i)->mid = 0;
+- }
+
+ result = SCTP_STRRESET_PERFORMED;
+
+ *evp = sctp_ulpevent_make_stream_reset_event(asoc,
+- flags | SCTP_STREAM_RESET_OUTGOING_SSN, nums, str_p,
+- GFP_ATOMIC);
++ SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC);
+
+ out:
+ sctp_update_strreset_result(asoc, result);
+@@ -738,9 +734,6 @@ struct sctp_chunk *sctp_process_strreset
+
+ result = SCTP_STRRESET_PERFORMED;
+
+- *evp = sctp_ulpevent_make_stream_reset_event(asoc,
+- SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC);
+-
+ out:
+ sctp_update_strreset_result(asoc, result);
+ err:
+@@ -1036,10 +1029,10 @@ struct sctp_chunk *sctp_process_strreset
+ sout->mid_uo = 0;
+ }
+ }
+-
+- flags = SCTP_STREAM_RESET_OUTGOING_SSN;
+ }
+
++ flags |= SCTP_STREAM_RESET_OUTGOING_SSN;
++
+ for (i = 0; i < stream->outcnt; i++)
+ SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
+
+@@ -1058,6 +1051,8 @@ struct sctp_chunk *sctp_process_strreset
+ nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) /
+ sizeof(__u16);
+
++ flags |= SCTP_STREAM_RESET_INCOMING_SSN;
++
+ *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
+ nums, str_p, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) {
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 22 Jan 2019 02:42:09 +0800
+Subject: sctp: set chunk transport correctly when it's a new asoc
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 4ff40b86262b73553ee47cc3784ce8ba0f220bd8 ]
+
+In the paths:
+
+ sctp_sf_do_unexpected_init() ->
+ sctp_make_init_ack()
+ sctp_sf_do_dupcook_a/b()() ->
+ sctp_sf_do_5_1D_ce()
+
+The new chunk 'retval' transport is set from the incoming chunk 'chunk'
+transport. However, 'retval' transport belong to the new asoc, which
+is a different one from 'chunk' transport's asoc.
+
+It will cause that the 'retval' chunk gets set with a wrong transport.
+Later when sending it and because of Commit b9fd683982c9 ("sctp: add
+sctp_packet_singleton"), sctp_packet_singleton() will set some fields,
+like vtag to 'retval' chunk from that wrong transport's asoc.
+
+This patch is to fix it by setting 'retval' transport correctly which
+belongs to the right asoc in sctp_make_init_ack() and
+sctp_sf_do_5_1D_ce().
+
+Fixes: b9fd683982c9 ("sctp: add sctp_packet_singleton")
+Reported-by: Ying Xu <yinxu@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/sm_make_chunk.c | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/net/sctp/sm_make_chunk.c
++++ b/net/sctp/sm_make_chunk.c
+@@ -495,7 +495,10 @@ struct sctp_chunk *sctp_make_init_ack(co
+ *
+ * [INIT ACK back to where the INIT came from.]
+ */
+- retval->transport = chunk->transport;
++ if (chunk->transport)
++ retval->transport =
++ sctp_assoc_lookup_paddr(asoc,
++ &chunk->transport->ipaddr);
+
+ retval->subh.init_hdr =
+ sctp_addto_chunk(retval, sizeof(initack), &initack);
+@@ -642,8 +645,10 @@ struct sctp_chunk *sctp_make_cookie_ack(
+ *
+ * [COOKIE ACK back to where the COOKIE ECHO came from.]
+ */
+- if (retval && chunk)
+- retval->transport = chunk->transport;
++ if (retval && chunk && chunk->transport)
++ retval->transport =
++ sctp_assoc_lookup_paddr(asoc,
++ &chunk->transport->ipaddr);
+
+ return retval;
+ }
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 22 Jan 2019 02:42:41 +0800
+Subject: sctp: set flow sport from saddr only when it's 0
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit ecf938fe7d0088077ee1280419a2b3c5429b47c8 ]
+
+Now sctp_transport_pmtu() passes transport->saddr into .get_dst() to set
+flow sport from 'saddr'. However, transport->saddr is set only when
+transport->dst exists in sctp_transport_route().
+
+If sctp_transport_pmtu() is called without transport->saddr set, like
+when transport->dst doesn't exists, the flow sport will be set to 0
+from transport->saddr, which will cause a wrong route to be got.
+
+Commit 6e91b578bf3f ("sctp: re-use sctp_transport_pmtu in
+sctp_transport_route") made the issue be triggered more easily
+since sctp_transport_pmtu() would be called in sctp_transport_route()
+after that.
+
+In gerneral, fl4->fl4_sport should always be set to
+htons(asoc->base.bind_addr.port), unless transport->asoc doesn't exist
+in sctp_v4/6_get_dst(), which is the case:
+
+ sctp_ootb_pkt_new() ->
+ sctp_transport_route()
+
+For that, we can simply handle it by setting flow sport from saddr only
+when it's 0 in sctp_v4/6_get_dst().
+
+Fixes: 6e91b578bf3f ("sctp: re-use sctp_transport_pmtu in sctp_transport_route")
+Reported-by: Ying Xu <yinxu@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/ipv6.c | 3 ++-
+ net/sctp/protocol.c | 3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/sctp/ipv6.c
++++ b/net/sctp/ipv6.c
+@@ -277,7 +277,8 @@ static void sctp_v6_get_dst(struct sctp_
+
+ if (saddr) {
+ fl6->saddr = saddr->v6.sin6_addr;
+- fl6->fl6_sport = saddr->v6.sin6_port;
++ if (!fl6->fl6_sport)
++ fl6->fl6_sport = saddr->v6.sin6_port;
+
+ pr_debug("src=%pI6 - ", &fl6->saddr);
+ }
+--- a/net/sctp/protocol.c
++++ b/net/sctp/protocol.c
+@@ -440,7 +440,8 @@ static void sctp_v4_get_dst(struct sctp_
+ }
+ if (saddr) {
+ fl4->saddr = saddr->v4.sin_addr.s_addr;
+- fl4->fl4_sport = saddr->v4.sin_port;
++ if (!fl4->fl4_sport)
++ fl4->fl4_sport = saddr->v4.sin_port;
+ }
+
+ pr_debug("%s: dst:%pI4, src:%pI4 - ", __func__, &fl4->daddr,
fix-net-ipv4-do-not-handle-duplicate-fragments-as-overlapping.patch
drm-msm-gpu-fix-building-without-debugfs.patch
+ipv6-consider-sk_bound_dev_if-when-binding-a-socket-to-an-address.patch
+ipv6-sr-clear-ip6cb-skb-on-srh-ip4ip6-encapsulation.patch
+ipvlan-l3mdev-fix-broken-l3s-mode-wrt-local-routes.patch
+l2tp-copy-4-more-bytes-to-linear-part-if-necessary.patch
+l2tp-fix-reading-optional-fields-of-l2tpv3.patch
+net-ip_gre-always-reports-o_key-to-userspace.patch
+net-ip_gre-use-erspan-key-field-for-tunnel-lookup.patch
+net-mlx4_core-add-masking-for-a-few-queries-on-hca-caps.patch
+netrom-switch-to-sock-timer-api.patch
+net-rose-fix-null-ax25_cb-kernel-panic.patch
+net-set-default-network-namespace-in-init_dummy_netdev.patch
+ravb-expand-rx-descriptor-data-to-accommodate-hw-checksum.patch
+sctp-improve-the-events-for-sctp-stream-reset.patch
+tun-move-the-call-to-tun_set_real_num_queues.patch
+ucc_geth-reset-bql-queue-when-stopping-device.patch
+vhost-fix-oob-in-get_rx_bufs.patch
+net-ip6_gre-always-reports-o_key-to-userspace.patch
+sctp-improve-the-events-for-sctp-stream-adding.patch
+net-mlx5e-allow-mac-invalidation-while-spoofchk-is-on.patch
+ip6mr-fix-notifiers-call-on-mroute_clean_tables.patch
+revert-net-mlx5e-e-switch-initialize-eswitch-only-if-eswitch-manager.patch
+sctp-set-chunk-transport-correctly-when-it-s-a-new-asoc.patch
+sctp-set-flow-sport-from-saddr-only-when-it-s-0.patch
+virtio_net-don-t-enable-napi-when-interface-is-down.patch
+virtio_net-don-t-call-free_old_xmit_skbs-for-xdp_frames.patch
+virtio_net-fix-not-restoring-real_num_rx_queues.patch
+virtio_net-fix-out-of-bounds-access-of-sq.patch
+virtio_net-don-t-process-redirected-xdp-frames-when-xdp-is-disabled.patch
+virtio_net-use-xdp_return_frame-to-free-xdp_frames-on-destroying-vqs.patch
+virtio_net-differentiate-sk_buff-and-xdp_frame-on-freeing.patch
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: George Amanakis <gamanakis@gmail.com>
+Date: Tue, 29 Jan 2019 22:50:13 -0500
+Subject: tun: move the call to tun_set_real_num_queues
+
+From: George Amanakis <gamanakis@gmail.com>
+
+[ Upstream commit 3a03cb8456cc1d61c467a5375e0a10e5207b948c ]
+
+Call tun_set_real_num_queues() after the increment of tun->numqueues
+since the former depends on it. Otherwise, the number of queues is not
+correctly accounted for, which results to warnings similar to:
+"vnet0 selects TX queue 11, but real number of TX queues is 11".
+
+Fixes: 0b7959b62573 ("tun: publish tfile after it's fully initialized")
+Reported-and-tested-by: George Amanakis <gamanakis@gmail.com>
+Signed-off-by: George Amanakis <gamanakis@gmail.com>
+Signed-off-by: Stanislav Fomichev <sdf@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -866,8 +866,6 @@ static int tun_attach(struct tun_struct
+ tun_napi_init(tun, tfile, napi, napi_frags);
+ }
+
+- tun_set_real_num_queues(tun);
+-
+ /* device is allowed to go away first, so no need to hold extra
+ * refcnt.
+ */
+@@ -879,6 +877,7 @@ static int tun_attach(struct tun_struct
+ rcu_assign_pointer(tfile->tun, tun);
+ rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
+ tun->numqueues++;
++ tun_set_real_num_queues(tun);
+ out:
+ return err;
+ }
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Mathias Thore <mathias.thore@infinera.com>
+Date: Mon, 28 Jan 2019 10:07:47 +0100
+Subject: ucc_geth: Reset BQL queue when stopping device
+
+From: Mathias Thore <mathias.thore@infinera.com>
+
+[ Upstream commit e15aa3b2b1388c399c1a2ce08550d2cc4f7e3e14 ]
+
+After a timeout event caused by for example a broadcast storm, when
+the MAC and PHY are reset, the BQL TX queue needs to be reset as
+well. Otherwise, the device will exhibit severe performance issues
+even after the storm has ended.
+
+Co-authored-by: David Gounaris <david.gounaris@infinera.com>
+Signed-off-by: Mathias Thore <mathias.thore@infinera.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/ucc_geth.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/freescale/ucc_geth.c
++++ b/drivers/net/ethernet/freescale/ucc_geth.c
+@@ -1888,6 +1888,8 @@ static void ucc_geth_free_tx(struct ucc_
+ u16 i, j;
+ u8 __iomem *bd;
+
++ netdev_reset_queue(ugeth->ndev);
++
+ ug_info = ugeth->ug_info;
+ uf_info = &ug_info->uf_info;
+
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Jason Wang <jasowang@redhat.com>
+Date: Mon, 28 Jan 2019 15:05:05 +0800
+Subject: vhost: fix OOB in get_rx_bufs()
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit b46a0bf78ad7b150ef5910da83859f7f5a514ffd ]
+
+After batched used ring updating was introduced in commit e2b3b35eb989
+("vhost_net: batch used ring update in rx"). We tend to batch heads in
+vq->heads for more than one packet. But the quota passed to
+get_rx_bufs() was not correctly limited, which can result a OOB write
+in vq->heads.
+
+ headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
+ vhost_len, &in, vq_log, &log,
+ likely(mergeable) ? UIO_MAXIOV : 1);
+
+UIO_MAXIOV was still used which is wrong since we could have batched
+used in vq->heads, this will cause OOB if the next buffer needs more
+than 960 (1024 (UIO_MAXIOV) - 64 (VHOST_NET_BATCH)) heads after we've
+batched 64 (VHOST_NET_BATCH) heads:
+Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+
+=============================================================================
+BUG kmalloc-8k (Tainted: G B ): Redzone overwritten
+-----------------------------------------------------------------------------
+
+INFO: 0x00000000fd93b7a2-0x00000000f0713384. First byte 0xa9 instead of 0xcc
+INFO: Allocated in alloc_pd+0x22/0x60 age=3933677 cpu=2 pid=2674
+ kmem_cache_alloc_trace+0xbb/0x140
+ alloc_pd+0x22/0x60
+ gen8_ppgtt_create+0x11d/0x5f0
+ i915_ppgtt_create+0x16/0x80
+ i915_gem_create_context+0x248/0x390
+ i915_gem_context_create_ioctl+0x4b/0xe0
+ drm_ioctl_kernel+0xa5/0xf0
+ drm_ioctl+0x2ed/0x3a0
+ do_vfs_ioctl+0x9f/0x620
+ ksys_ioctl+0x6b/0x80
+ __x64_sys_ioctl+0x11/0x20
+ do_syscall_64+0x43/0xf0
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+INFO: Slab 0x00000000d13e87af objects=3 used=3 fp=0x (null) flags=0x200000000010201
+INFO: Object 0x0000000003278802 @offset=17064 fp=0x00000000e2e6652b
+
+Fixing this by allocating UIO_MAXIOV + VHOST_NET_BATCH iovs for
+vhost-net. This is done through set the limitation through
+vhost_dev_init(), then set_owner can allocate the number of iov in a
+per device manner.
+
+This fixes CVE-2018-16880.
+
+Fixes: e2b3b35eb989 ("vhost_net: batch used ring update in rx")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/net.c | 3 ++-
+ drivers/vhost/scsi.c | 2 +-
+ drivers/vhost/vhost.c | 7 ++++---
+ drivers/vhost/vhost.h | 4 +++-
+ drivers/vhost/vsock.c | 2 +-
+ 5 files changed, 11 insertions(+), 7 deletions(-)
+
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -1114,7 +1114,8 @@ static int vhost_net_open(struct inode *
+ n->vqs[i].rx_ring = NULL;
+ vhost_net_buf_init(&n->vqs[i].rxq);
+ }
+- vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
++ vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
++ UIO_MAXIOV + VHOST_NET_BATCH);
+
+ vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
+ vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);
+--- a/drivers/vhost/scsi.c
++++ b/drivers/vhost/scsi.c
+@@ -1398,7 +1398,7 @@ static int vhost_scsi_open(struct inode
+ vqs[i] = &vs->vqs[i].vq;
+ vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
+ }
+- vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ);
++ vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV);
+
+ vhost_scsi_init_inflight(vs, NULL);
+
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -390,9 +390,9 @@ static long vhost_dev_alloc_iovecs(struc
+ vq->indirect = kmalloc_array(UIO_MAXIOV,
+ sizeof(*vq->indirect),
+ GFP_KERNEL);
+- vq->log = kmalloc_array(UIO_MAXIOV, sizeof(*vq->log),
++ vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log),
+ GFP_KERNEL);
+- vq->heads = kmalloc_array(UIO_MAXIOV, sizeof(*vq->heads),
++ vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
+ GFP_KERNEL);
+ if (!vq->indirect || !vq->log || !vq->heads)
+ goto err_nomem;
+@@ -414,7 +414,7 @@ static void vhost_dev_free_iovecs(struct
+ }
+
+ void vhost_dev_init(struct vhost_dev *dev,
+- struct vhost_virtqueue **vqs, int nvqs)
++ struct vhost_virtqueue **vqs, int nvqs, int iov_limit)
+ {
+ struct vhost_virtqueue *vq;
+ int i;
+@@ -427,6 +427,7 @@ void vhost_dev_init(struct vhost_dev *de
+ dev->iotlb = NULL;
+ dev->mm = NULL;
+ dev->worker = NULL;
++ dev->iov_limit = iov_limit;
+ init_llist_head(&dev->work_list);
+ init_waitqueue_head(&dev->wait);
+ INIT_LIST_HEAD(&dev->read_list);
+--- a/drivers/vhost/vhost.h
++++ b/drivers/vhost/vhost.h
+@@ -170,9 +170,11 @@ struct vhost_dev {
+ struct list_head read_list;
+ struct list_head pending_list;
+ wait_queue_head_t wait;
++ int iov_limit;
+ };
+
+-void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs);
++void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
++ int nvqs, int iov_limit);
+ long vhost_dev_set_owner(struct vhost_dev *dev);
+ bool vhost_dev_has_owner(struct vhost_dev *dev);
+ long vhost_dev_check_owner(struct vhost_dev *);
+--- a/drivers/vhost/vsock.c
++++ b/drivers/vhost/vsock.c
+@@ -531,7 +531,7 @@ static int vhost_vsock_dev_open(struct i
+ vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
+ vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
+
+- vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs));
++ vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV);
+
+ file->private_data = vsock;
+ spin_lock_init(&vsock->send_pkt_list_lock);
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:59 +0900
+Subject: virtio_net: Differentiate sk_buff and xdp_frame on freeing
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 5050471d35d1316ba32dfcbb409978337eb9e75e
+
+ I had to fold commit df133f3f9625 ("virtio_net: bulk free tx skbs")
+ into this to make it work. ]
+
+We do not reset or free up unused buffers when enabling/disabling XDP,
+so it can happen that xdp_frames are freed after disabling XDP or
+sk_buffs are freed after enabling XDP on xdp tx queues.
+Thus we need to handle both forms (xdp_frames and sk_buffs) regardless
+of XDP setting.
+One way to trigger this problem is to disable XDP when napi_tx is
+enabled. In that case, virtnet_xdp_set() calls virtnet_napi_enable()
+which kicks NAPI. The NAPI handler will call virtnet_poll_cleantx()
+which invokes free_old_xmit_skbs() for queues which have been used by
+XDP.
+
+Note that even with this change we need to keep skipping
+free_old_xmit_skbs() from NAPI handlers when XDP is enabled, because XDP
+tx queues do not aquire queue locks.
+
+- v2: Use napi_consume_skb() instead of dev_consume_skb_any()
+
+Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c | 64 ++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 47 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -57,6 +57,8 @@ module_param(napi_tx, bool, 0644);
+ #define VIRTIO_XDP_TX BIT(0)
+ #define VIRTIO_XDP_REDIR BIT(1)
+
++#define VIRTIO_XDP_FLAG BIT(0)
++
+ /* RX packet size EWMA. The average packet size is used to determine the packet
+ * buffer size when refilling RX rings. As the entire RX ring may be refilled
+ * at once, the weight is chosen so that the EWMA will be insensitive to short-
+@@ -251,6 +253,21 @@ struct padded_vnet_hdr {
+ char padding[4];
+ };
+
++static bool is_xdp_frame(void *ptr)
++{
++ return (unsigned long)ptr & VIRTIO_XDP_FLAG;
++}
++
++static void *xdp_to_ptr(struct xdp_frame *ptr)
++{
++ return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
++}
++
++static struct xdp_frame *ptr_to_xdp(void *ptr)
++{
++ return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
++}
++
+ /* Converting between virtqueue no. and kernel tx/rx queue no.
+ * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
+ */
+@@ -461,7 +478,8 @@ static int __virtnet_xdp_xmit_one(struct
+
+ sg_init_one(sq->sg, xdpf->data, xdpf->len);
+
+- err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC);
++ err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf),
++ GFP_ATOMIC);
+ if (unlikely(err))
+ return -ENOSPC; /* Caller handle free/refcnt */
+
+@@ -481,13 +499,13 @@ static int virtnet_xdp_xmit(struct net_d
+ {
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct receive_queue *rq = vi->rq;
+- struct xdp_frame *xdpf_sent;
+ struct bpf_prog *xdp_prog;
+ struct send_queue *sq;
+ unsigned int len;
+ int drops = 0;
+ int kicks = 0;
+ int ret, err;
++ void *ptr;
+ int i;
+
+ /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
+@@ -506,8 +524,12 @@ static int virtnet_xdp_xmit(struct net_d
+ }
+
+ /* Free up any pending old buffers before queueing new ones. */
+- while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
+- xdp_return_frame(xdpf_sent);
++ while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
++ if (likely(is_xdp_frame(ptr)))
++ xdp_return_frame(ptr_to_xdp(ptr));
++ else
++ napi_consume_skb(ptr, false);
++ }
+
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+@@ -1326,20 +1348,28 @@ static int virtnet_receive(struct receiv
+ return stats.packets;
+ }
+
+-static void free_old_xmit_skbs(struct send_queue *sq)
++static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
+ {
+- struct sk_buff *skb;
+ unsigned int len;
+ unsigned int packets = 0;
+ unsigned int bytes = 0;
++ void *ptr;
+
+- while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+- pr_debug("Sent skb %p\n", skb);
++ while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
++ if (likely(!is_xdp_frame(ptr))) {
++ struct sk_buff *skb = ptr;
+
+- bytes += skb->len;
+- packets++;
++ pr_debug("Sent skb %p\n", skb);
+
+- dev_consume_skb_any(skb);
++ bytes += skb->len;
++ napi_consume_skb(skb, in_napi);
++ } else {
++ struct xdp_frame *frame = ptr_to_xdp(ptr);
++
++ bytes += frame->len;
++ xdp_return_frame(frame);
++ }
++ packets++;
+ }
+
+ /* Avoid overhead when no packets have been processed
+@@ -1375,7 +1405,7 @@ static void virtnet_poll_cleantx(struct
+ return;
+
+ if (__netif_tx_trylock(txq)) {
+- free_old_xmit_skbs(sq);
++ free_old_xmit_skbs(sq, true);
+ __netif_tx_unlock(txq);
+ }
+
+@@ -1459,7 +1489,7 @@ static int virtnet_poll_tx(struct napi_s
+
+ txq = netdev_get_tx_queue(vi->dev, index);
+ __netif_tx_lock(txq, raw_smp_processor_id());
+- free_old_xmit_skbs(sq);
++ free_old_xmit_skbs(sq, true);
+ __netif_tx_unlock(txq);
+
+ virtqueue_napi_complete(napi, sq->vq, 0);
+@@ -1528,7 +1558,7 @@ static netdev_tx_t start_xmit(struct sk_
+ bool use_napi = sq->napi.weight;
+
+ /* Free up any pending old buffers before queueing new ones. */
+- free_old_xmit_skbs(sq);
++ free_old_xmit_skbs(sq, false);
+
+ if (use_napi && kick)
+ virtqueue_enable_cb_delayed(sq->vq);
+@@ -1571,7 +1601,7 @@ static netdev_tx_t start_xmit(struct sk_
+ if (!use_napi &&
+ unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
+ /* More just got used, free them then recheck. */
+- free_old_xmit_skbs(sq);
++ free_old_xmit_skbs(sq, false);
+ if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
+ netif_start_subqueue(dev, qnum);
+ virtqueue_disable_cb(sq->vq);
+@@ -2590,10 +2620,10 @@ static void free_unused_bufs(struct virt
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ struct virtqueue *vq = vi->sq[i].vq;
+ while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
+- if (!is_xdp_raw_buffer_queue(vi, i))
++ if (!is_xdp_frame(buf))
+ dev_kfree_skb(buf);
+ else
+- xdp_return_frame(buf);
++ xdp_return_frame(ptr_to_xdp(buf));
+ }
+ }
+
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:54 +0900
+Subject: virtio_net: Don't call free_old_xmit_skbs for xdp_frames
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 534da5e856334fb54cb0272a9fb3afec28ea3aed ]
+
+When napi_tx is enabled, virtnet_poll_cleantx() called
+free_old_xmit_skbs() even for xdp send queue.
+This is bogus since the queue has xdp_frames, not sk_buffs, thus mangled
+device tx bytes counters because skb->len is meaningless value, and even
+triggered oops due to general protection fault on freeing them.
+
+Since xdp send queues do not aquire locks, old xdp_frames should be
+freed only in virtnet_xdp_xmit(), so just skip free_old_xmit_skbs() for
+xdp send queues.
+
+Similarly virtnet_poll_tx() called free_old_xmit_skbs(). This NAPI
+handler is called even without calling start_xmit() because cb for tx is
+by default enabled. Once the handler is called, it enabled the cb again,
+and then the handler would be called again. We don't need this handler
+for XDP, so don't enable cb as well as not calling free_old_xmit_skbs().
+
+Also, we need to disable tx NAPI when disabling XDP, so
+virtnet_poll_tx() can safely access curr_queue_pairs and
+xdp_queue_pairs, which are not atomically updated while disabling XDP.
+
+Fixes: b92f1e6751a6 ("virtio-net: transmit napi")
+Fixes: 7b0411ef4aa6 ("virtio-net: clean tx descriptors from rx napi")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c | 49 +++++++++++++++++++++++++++++++----------------
+ 1 file changed, 33 insertions(+), 16 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -1357,6 +1357,16 @@ static void free_old_xmit_skbs(struct se
+ u64_stats_update_end(&sq->stats.syncp);
+ }
+
++static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
++{
++ if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
++ return false;
++ else if (q < vi->curr_queue_pairs)
++ return true;
++ else
++ return false;
++}
++
+ static void virtnet_poll_cleantx(struct receive_queue *rq)
+ {
+ struct virtnet_info *vi = rq->vq->vdev->priv;
+@@ -1364,7 +1374,7 @@ static void virtnet_poll_cleantx(struct
+ struct send_queue *sq = &vi->sq[index];
+ struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
+
+- if (!sq->napi.weight)
++ if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
+ return;
+
+ if (__netif_tx_trylock(txq)) {
+@@ -1441,8 +1451,16 @@ static int virtnet_poll_tx(struct napi_s
+ {
+ struct send_queue *sq = container_of(napi, struct send_queue, napi);
+ struct virtnet_info *vi = sq->vq->vdev->priv;
+- struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq));
++ unsigned int index = vq2txq(sq->vq);
++ struct netdev_queue *txq;
+
++ if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
++ /* We don't need to enable cb for XDP */
++ napi_complete_done(napi, 0);
++ return 0;
++ }
++
++ txq = netdev_get_tx_queue(vi->dev, index);
+ __netif_tx_lock(txq, raw_smp_processor_id());
+ free_old_xmit_skbs(sq);
+ __netif_tx_unlock(txq);
+@@ -2352,9 +2370,12 @@ static int virtnet_xdp_set(struct net_de
+ }
+
+ /* Make sure NAPI is not using any XDP TX queues for RX. */
+- if (netif_running(dev))
+- for (i = 0; i < vi->max_queue_pairs; i++)
++ if (netif_running(dev)) {
++ for (i = 0; i < vi->max_queue_pairs; i++) {
+ napi_disable(&vi->rq[i].napi);
++ virtnet_napi_tx_disable(&vi->sq[i].napi);
++ }
++ }
+
+ netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+ err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
+@@ -2373,16 +2394,22 @@ static int virtnet_xdp_set(struct net_de
+ }
+ if (old_prog)
+ bpf_prog_put(old_prog);
+- if (netif_running(dev))
++ if (netif_running(dev)) {
+ virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
++ virtnet_napi_tx_enable(vi, vi->sq[i].vq,
++ &vi->sq[i].napi);
++ }
+ }
+
+ return 0;
+
+ err:
+ if (netif_running(dev)) {
+- for (i = 0; i < vi->max_queue_pairs; i++)
++ for (i = 0; i < vi->max_queue_pairs; i++) {
+ virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
++ virtnet_napi_tx_enable(vi, vi->sq[i].vq,
++ &vi->sq[i].napi);
++ }
+ }
+ if (prog)
+ bpf_prog_sub(prog, vi->max_queue_pairs - 1);
+@@ -2539,16 +2566,6 @@ static void free_receive_page_frags(stru
+ put_page(vi->rq[i].alloc_frag.page);
+ }
+
+-static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
+-{
+- if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
+- return false;
+- else if (q < vi->curr_queue_pairs)
+- return true;
+- else
+- return false;
+-}
+-
+ static void free_unused_bufs(struct virtnet_info *vi)
+ {
+ void *buf;
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:53 +0900
+Subject: virtio_net: Don't enable NAPI when interface is down
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 8be4d9a492f88b96d4d3a06c6cbedbc40ca14c83 ]
+
+Commit 4e09ff536284 ("virtio-net: disable NAPI only when enabled during
+XDP set") tried to fix inappropriate NAPI enabling/disabling when
+!netif_running(), but was not complete.
+
+On error path virtio_net could enable NAPI even when !netif_running().
+This can cause enabling NAPI twice on virtnet_open(), which would
+trigger BUG_ON() in napi_enable().
+
+Fixes: 4941d472bf95b ("virtio-net: do not reset during XDP set")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -2380,8 +2380,10 @@ static int virtnet_xdp_set(struct net_de
+ return 0;
+
+ err:
+- for (i = 0; i < vi->max_queue_pairs; i++)
+- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
++ if (netif_running(dev)) {
++ for (i = 0; i < vi->max_queue_pairs; i++)
++ virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
++ }
+ if (prog)
+ bpf_prog_sub(prog, vi->max_queue_pairs - 1);
+ return err;
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:57 +0900
+Subject: virtio_net: Don't process redirected XDP frames when XDP is disabled
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 03aa6d34868c07b2b1b8b2db080602d7ec528173 ]
+
+Commit 8dcc5b0ab0ec ("virtio_net: fix ndo_xdp_xmit crash towards dev not
+ready for XDP") tried to avoid access to unexpected sq while XDP is
+disabled, but was not complete.
+
+There was a small window which causes out of bounds sq access in
+virtnet_xdp_xmit() while disabling XDP.
+
+An example case of
+ - curr_queue_pairs = 6 (2 for SKB and 4 for XDP)
+ - online_cpu_num = xdp_queue_paris = 4
+when XDP is enabled:
+
+CPU 0 CPU 1
+(Disabling XDP) (Processing redirected XDP frames)
+
+ virtnet_xdp_xmit()
+virtnet_xdp_set()
+ _virtnet_set_queues()
+ set curr_queue_pairs (2)
+ check if rq->xdp_prog is not NULL
+ virtnet_xdp_sq(vi)
+ qp = curr_queue_pairs -
+ xdp_queue_pairs +
+ smp_processor_id()
+ = 2 - 4 + 1 = -1
+ sq = &vi->sq[qp] // out of bounds access
+ set xdp_queue_pairs (0)
+ rq->xdp_prog = NULL
+
+Basically we should not change curr_queue_pairs and xdp_queue_pairs
+while someone can read the values. Thus, when disabling XDP, assign NULL
+to rq->xdp_prog first, and wait for RCU grace period, then change
+xxx_queue_pairs.
+Note that we need to keep the current order when enabling XDP though.
+
+- v2: Make rcu_assign_pointer/synchronize_net conditional instead of
+ _virtnet_set_queues.
+
+Fixes: 186b3c998c50 ("virtio-net: support XDP_REDIRECT")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c | 33 ++++++++++++++++++++++++++-------
+ 1 file changed, 26 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -2360,6 +2360,10 @@ static int virtnet_xdp_set(struct net_de
+ return -ENOMEM;
+ }
+
++ old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
++ if (!prog && !old_prog)
++ return 0;
++
+ if (prog) {
+ prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
+ if (IS_ERR(prog))
+@@ -2374,21 +2378,30 @@ static int virtnet_xdp_set(struct net_de
+ }
+ }
+
++ if (!prog) {
++ for (i = 0; i < vi->max_queue_pairs; i++) {
++ rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
++ if (i == 0)
++ virtnet_restore_guest_offloads(vi);
++ }
++ synchronize_net();
++ }
++
+ err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
+ if (err)
+ goto err;
+ netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+ vi->xdp_queue_pairs = xdp_qp;
+
+- for (i = 0; i < vi->max_queue_pairs; i++) {
+- old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
+- rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+- if (i == 0) {
+- if (!old_prog)
++ if (prog) {
++ for (i = 0; i < vi->max_queue_pairs; i++) {
++ rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
++ if (i == 0 && !old_prog)
+ virtnet_clear_guest_offloads(vi);
+- if (!prog)
+- virtnet_restore_guest_offloads(vi);
+ }
++ }
++
++ for (i = 0; i < vi->max_queue_pairs; i++) {
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ if (netif_running(dev)) {
+@@ -2401,6 +2414,12 @@ static int virtnet_xdp_set(struct net_de
+ return 0;
+
+ err:
++ if (!prog) {
++ virtnet_clear_guest_offloads(vi);
++ for (i = 0; i < vi->max_queue_pairs; i++)
++ rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
++ }
++
+ if (netif_running(dev)) {
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:55 +0900
+Subject: virtio_net: Fix not restoring real_num_rx_queues
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 188313c137c4f76afd0862f50dbc185b198b9e2a ]
+
+When _virtnet_set_queues() failed we did not restore real_num_rx_queues.
+Fix this by placing the change of real_num_rx_queues after
+_virtnet_set_queues().
+This order is also in line with virtnet_set_channels().
+
+Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -2377,10 +2377,10 @@ static int virtnet_xdp_set(struct net_de
+ }
+ }
+
+- netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+ err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
+ if (err)
+ goto err;
++ netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+ vi->xdp_queue_pairs = xdp_qp;
+
+ for (i = 0; i < vi->max_queue_pairs; i++) {
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:56 +0900
+Subject: virtio_net: Fix out of bounds access of sq
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 1667c08a9d31c7cdf09f4890816bfbf20b685495 ]
+
+When XDP is disabled, curr_queue_pairs + smp_processor_id() can be
+larger than max_queue_pairs.
+There is no guarantee that we have enough XDP send queues dedicated for
+each cpu when XDP is disabled, so do not count drops on sq in that case.
+
+Fixes: 5b8f3c8d30a6 ("virtio_net: Add XDP related stats")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c | 17 +++++++----------
+ 1 file changed, 7 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -490,20 +490,17 @@ static int virtnet_xdp_xmit(struct net_d
+ int ret, err;
+ int i;
+
+- sq = virtnet_xdp_sq(vi);
+-
+- if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
+- ret = -EINVAL;
+- drops = n;
+- goto out;
+- }
+-
+ /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
+ * indicate XDP resources have been successfully allocated.
+ */
+ xdp_prog = rcu_dereference(rq->xdp_prog);
+- if (!xdp_prog) {
+- ret = -ENXIO;
++ if (!xdp_prog)
++ return -ENXIO;
++
++ sq = virtnet_xdp_sq(vi);
++
++ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
++ ret = -EINVAL;
+ drops = n;
+ goto out;
+ }
--- /dev/null
+From foo@baz Sat Feb 2 10:53:21 CET 2019
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Tue, 29 Jan 2019 09:45:58 +0900
+Subject: virtio_net: Use xdp_return_frame to free xdp_frames on destroying vqs
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit 07b344f494ddda9f061b396407c96df8c46c82b5 ]
+
+put_page() can work as a fallback for freeing xdp_frames, but the
+appropriate way is to use xdp_return_frame().
+
+Fixes: cac320c850ef ("virtio_net: convert to use generic xdp_frame and xdp_return_frame API")
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -2593,7 +2593,7 @@ static void free_unused_bufs(struct virt
+ if (!is_xdp_raw_buffer_queue(vi, i))
+ dev_kfree_skb(buf);
+ else
+- put_page(virt_to_head_page(buf));
++ xdp_return_frame(buf);
+ }
+ }
+