From: Greg Kroah-Hartman Date: Sat, 2 Feb 2019 09:54:03 +0000 (+0100) Subject: 4.20-stable patches X-Git-Tag: v4.20.7~46 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d99827c629b8c5daddfb76298159018ff878befe;p=thirdparty%2Fkernel%2Fstable-queue.git 4.20-stable patches added patches: ip6mr-fix-notifiers-call-on-mroute_clean_tables.patch ipv6-consider-sk_bound_dev_if-when-binding-a-socket-to-an-address.patch ipv6-sr-clear-ip6cb-skb-on-srh-ip4ip6-encapsulation.patch ipvlan-l3mdev-fix-broken-l3s-mode-wrt-local-routes.patch l2tp-copy-4-more-bytes-to-linear-part-if-necessary.patch l2tp-fix-reading-optional-fields-of-l2tpv3.patch net-ip6_gre-always-reports-o_key-to-userspace.patch net-ip_gre-always-reports-o_key-to-userspace.patch net-ip_gre-use-erspan-key-field-for-tunnel-lookup.patch net-ipv6-don-t-return-positive-numbers-when-nothing-was-dumped.patch net-mlx4_core-add-masking-for-a-few-queries-on-hca-caps.patch net-mlx5e-allow-mac-invalidation-while-spoofchk-is-on.patch net-rose-fix-null-ax25_cb-kernel-panic.patch net-set-default-network-namespace-in-init_dummy_netdev.patch net-tls-fix-deadlock-in-free_resources-tx.patch net-tls-save-iv-in-tls_rec-for-async-crypto-requests.patch netrom-switch-to-sock-timer-api.patch ravb-expand-rx-descriptor-data-to-accommodate-hw-checksum.patch revert-net-mlx5e-e-switch-initialize-eswitch-only-if-eswitch-manager.patch sctp-improve-the-events-for-sctp-stream-adding.patch sctp-improve-the-events-for-sctp-stream-reset.patch sctp-set-chunk-transport-correctly-when-it-s-a-new-asoc.patch sctp-set-flow-sport-from-saddr-only-when-it-s-0.patch tun-move-the-call-to-tun_set_real_num_queues.patch ucc_geth-reset-bql-queue-when-stopping-device.patch vhost-fix-oob-in-get_rx_bufs.patch virtio_net-differentiate-sk_buff-and-xdp_frame-on-freeing.patch virtio_net-don-t-call-free_old_xmit_skbs-for-xdp_frames.patch virtio_net-don-t-enable-napi-when-interface-is-down.patch virtio_net-don-t-process-redirected-xdp-frames-when-xdp-is-disabled.patch virtio_net-fix-not-restoring-real_num_rx_queues.patch virtio_net-fix-out-of-bounds-access-of-sq.patch virtio_net-use-xdp_return_frame-to-free-xdp_frames-on-destroying-vqs.patch --- diff --git a/queue-4.20/ip6mr-fix-notifiers-call-on-mroute_clean_tables.patch b/queue-4.20/ip6mr-fix-notifiers-call-on-mroute_clean_tables.patch new file mode 100644 index 00000000000..8c1897479db --- /dev/null +++ b/queue-4.20/ip6mr-fix-notifiers-call-on-mroute_clean_tables.patch @@ -0,0 +1,90 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Nir Dotan +Date: Sun, 27 Jan 2019 09:26:22 +0200 +Subject: ip6mr: Fix notifiers call on mroute_clean_tables() + +From: Nir Dotan + +[ Upstream commit 146820cc240f4389cf33481c058d9493aef95e25 ] + +When the MC route socket is closed, mroute_clean_tables() is called to +cleanup existing routes. Mistakenly notifiers call was put on the cleanup +of the unresolved MC route entries cache. +In a case where the MC socket closes before an unresolved route expires, +the notifier call leads to a crash, caused by the driver trying to +increment a non initialized refcount_t object [1] and then when handling +is done, to decrement it [2]. This was detected by a test recently added in +commit 6d4efada3b82 ("selftests: forwarding: Add multicast routing test"). + +Fix that by putting notifiers call on the resolved entries traversal, +instead of on the unresolved entries traversal. + +[1] + +[ 245.748967] refcount_t: increment on 0; use-after-free. +[ 245.754829] WARNING: CPU: 3 PID: 3223 at lib/refcount.c:153 refcount_inc_checked+0x2b/0x30 +... +[ 245.802357] Hardware name: Mellanox Technologies Ltd. MSN2740/SA001237, BIOS 5.6.5 06/07/2016 +[ 245.811873] RIP: 0010:refcount_inc_checked+0x2b/0x30 +... +[ 245.907487] Call Trace: +[ 245.910231] mlxsw_sp_router_fib_event.cold.181+0x42/0x47 [mlxsw_spectrum] +[ 245.917913] notifier_call_chain+0x45/0x7 +[ 245.922484] atomic_notifier_call_chain+0x15/0x20 +[ 245.927729] call_fib_notifiers+0x15/0x30 +[ 245.932205] mroute_clean_tables+0x372/0x3f +[ 245.936971] ip6mr_sk_done+0xb1/0xc0 +[ 245.940960] ip6_mroute_setsockopt+0x1da/0x5f0 +... + +[2] + +[ 246.128487] refcount_t: underflow; use-after-free. +[ 246.133859] WARNING: CPU: 0 PID: 7 at lib/refcount.c:187 refcount_sub_and_test_checked+0x4c/0x60 +[ 246.183521] Hardware name: Mellanox Technologies Ltd. MSN2740/SA001237, BIOS 5.6.5 06/07/2016 +... +[ 246.193062] Workqueue: mlxsw_core_ordered mlxsw_sp_router_fibmr_event_work [mlxsw_spectrum] +[ 246.202394] RIP: 0010:refcount_sub_and_test_checked+0x4c/0x60 +... +[ 246.298889] Call Trace: +[ 246.301617] refcount_dec_and_test_checked+0x11/0x20 +[ 246.307170] mlxsw_sp_router_fibmr_event_work.cold.196+0x47/0x78 [mlxsw_spectrum] +[ 246.315531] process_one_work+0x1fa/0x3f0 +[ 246.320005] worker_thread+0x2f/0x3e0 +[ 246.324083] kthread+0x118/0x130 +[ 246.327683] ? wq_update_unbound_numa+0x1b0/0x1b0 +[ 246.332926] ? kthread_park+0x80/0x80 +[ 246.337013] ret_from_fork+0x1f/0x30 + +Fixes: 088aa3eec2ce ("ip6mr: Support fib notifications") +Signed-off-by: Nir Dotan +Reviewed-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6mr.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/net/ipv6/ip6mr.c ++++ b/net/ipv6/ip6mr.c +@@ -1516,6 +1516,9 @@ static void mroute_clean_tables(struct m + continue; + rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); + list_del_rcu(&c->list); ++ call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), ++ FIB_EVENT_ENTRY_DEL, ++ (struct mfc6_cache *)c, mrt->id); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); + mr_cache_put(c); + } +@@ -1524,10 +1527,6 @@ static void mroute_clean_tables(struct m + spin_lock_bh(&mfc_unres_lock); + list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { + list_del(&c->list); +- call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), +- FIB_EVENT_ENTRY_DEL, +- (struct mfc6_cache *)c, +- mrt->id); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, + RTM_DELROUTE); + ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); diff --git a/queue-4.20/ipv6-consider-sk_bound_dev_if-when-binding-a-socket-to-an-address.patch b/queue-4.20/ipv6-consider-sk_bound_dev_if-when-binding-a-socket-to-an-address.patch new file mode 100644 index 00000000000..4f09fa59db2 --- /dev/null +++ b/queue-4.20/ipv6-consider-sk_bound_dev_if-when-binding-a-socket-to-an-address.patch @@ -0,0 +1,37 @@ +From foo@baz Sat Feb 2 10:53:01 CET 2019 +From: David Ahern +Date: Wed, 2 Jan 2019 18:57:09 -0800 +Subject: ipv6: Consider sk_bound_dev_if when binding a socket to an address + +From: David Ahern + +[ Upstream commit c5ee066333ebc322a24a00a743ed941a0c68617e ] + +IPv6 does not consider if the socket is bound to a device when binding +to an address. The result is that a socket can be bound to eth0 and then +bound to the address of eth1. If the device is a VRF, the result is that +a socket can only be bound to an address in the default VRF. + +Resolve by considering the device if sk_bound_dev_if is set. + +This problem exists from the beginning of git history. + +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/af_inet6.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/ipv6/af_inet6.c ++++ b/net/ipv6/af_inet6.c +@@ -362,6 +362,9 @@ static int __inet6_bind(struct sock *sk, + err = -EINVAL; + goto out_unlock; + } ++ } ++ ++ if (sk->sk_bound_dev_if) { + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + if (!dev) { + err = -ENODEV; diff --git a/queue-4.20/ipv6-sr-clear-ip6cb-skb-on-srh-ip4ip6-encapsulation.patch b/queue-4.20/ipv6-sr-clear-ip6cb-skb-on-srh-ip4ip6-encapsulation.patch new file mode 100644 index 00000000000..979d973e58f --- /dev/null +++ b/queue-4.20/ipv6-sr-clear-ip6cb-skb-on-srh-ip4ip6-encapsulation.patch @@ -0,0 +1,37 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Yohei Kanemaru +Date: Tue, 29 Jan 2019 15:52:34 +0900 +Subject: ipv6: sr: clear IP6CB(skb) on SRH ip4ip6 encapsulation + +From: Yohei Kanemaru + +[ Upstream commit ef489749aae508e6f17886775c075f12ff919fb1 ] + +skb->cb may contain data from previous layers (in an observed case +IPv4 with L3 Master Device). In the observed scenario, the data in +IPCB(skb)->frags was misinterpreted as IP6CB(skb)->frag_max_size, +eventually caused an unexpected IPv6 fragmentation in ip6_fragment() +through ip6_finish_output(). + +This patch clears IP6CB(skb), which potentially contains garbage data, +on the SRH ip4ip6 encapsulation. + +Fixes: 32d99d0b6702 ("ipv6: sr: add support for ip4ip6 encapsulation") +Signed-off-by: Yohei Kanemaru +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/seg6_iptunnel.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv6/seg6_iptunnel.c ++++ b/net/ipv6/seg6_iptunnel.c +@@ -146,6 +146,8 @@ int seg6_do_srh_encap(struct sk_buff *sk + } else { + ip6_flow_hdr(hdr, 0, flowlabel); + hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); ++ ++ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + } + + hdr->nexthdr = NEXTHDR_ROUTING; diff --git a/queue-4.20/ipvlan-l3mdev-fix-broken-l3s-mode-wrt-local-routes.patch b/queue-4.20/ipvlan-l3mdev-fix-broken-l3s-mode-wrt-local-routes.patch new file mode 100644 index 00000000000..92ee0d6562d --- /dev/null +++ b/queue-4.20/ipvlan-l3mdev-fix-broken-l3s-mode-wrt-local-routes.patch @@ -0,0 +1,147 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Daniel Borkmann +Date: Wed, 30 Jan 2019 12:49:48 +0100 +Subject: ipvlan, l3mdev: fix broken l3s mode wrt local routes + +From: Daniel Borkmann + +[ Upstream commit d5256083f62e2720f75bb3c5a928a0afe47d6bc3 ] + +While implementing ipvlan l3 and l3s mode for kubernetes CNI plugin, +I ran into the issue that while l3 mode is working fine, l3s mode +does not have any connectivity to kube-apiserver and hence all pods +end up in Error state as well. The ipvlan master device sits on +top of a bond device and hostns traffic to kube-apiserver (also running +in hostns) is DNATed from 10.152.183.1:443 to 139.178.29.207:37573 +where the latter is the address of the bond0. While in l3 mode, a +curl to https://10.152.183.1:443 or to https://139.178.29.207:37573 +works fine from hostns, neither of them do in case of l3s. In the +latter only a curl to https://127.0.0.1:37573 appeared to work where +for local addresses of bond0 I saw kernel suddenly starting to emit +ARP requests to query HW address of bond0 which remained unanswered +and neighbor entries in INCOMPLETE state. These ARP requests only +happen while in l3s. + +Debugging this further, I found the issue is that l3s mode is piggy- +backing on l3 master device, and in this case local routes are using +l3mdev_master_dev_rcu(dev) instead of net->loopback_dev as per commit +f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev +if relevant") and 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be +a loopback"). I found that reverting them back into using the +net->loopback_dev fixed ipvlan l3s connectivity and got everything +working for the CNI. + +Now judging from 4fbae7d83c98 ("ipvlan: Introduce l3s mode") and the +l3mdev paper in [0] the only sole reason why ipvlan l3s is relying +on l3 master device is to get the l3mdev_ip_rcv() receive hook for +setting the dst entry of the input route without adding its own +ipvlan specific hacks into the receive path, however, any l3 domain +semantics beyond just that are breaking l3s operation. Note that +ipvlan also has the ability to dynamically switch its internal +operation from l3 to l3s for all ports via ipvlan_set_port_mode() +at runtime. In any case, l3 vs l3s soley distinguishes itself by +'de-confusing' netfilter through switching skb->dev to ipvlan slave +device late in NF_INET_LOCAL_IN before handing the skb to L4. + +Minimal fix taken here is to add a IFF_L3MDEV_RX_HANDLER flag which, +if set from ipvlan setup, gets us only the wanted l3mdev_l3_rcv() hook +without any additional l3mdev semantics on top. This should also have +minimal impact since dev->priv_flags is already hot in cache. With +this set, l3s mode is working fine and I also get things like +masquerading pod traffic on the ipvlan master properly working. + + [0] https://netdevconf.org/1.2/papers/ahern-what-is-l3mdev-paper.pdf + +Fixes: f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant") +Fixes: 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be a loopback") +Fixes: 4fbae7d83c98 ("ipvlan: Introduce l3s mode") +Signed-off-by: Daniel Borkmann +Cc: Mahesh Bandewar +Cc: David Ahern +Cc: Florian Westphal +Cc: Martynas Pumputis +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ipvlan/ipvlan_main.c | 6 +++--- + include/linux/netdevice.h | 8 ++++++++ + include/net/l3mdev.h | 3 ++- + 3 files changed, 13 insertions(+), 4 deletions(-) + +--- a/drivers/net/ipvlan/ipvlan_main.c ++++ b/drivers/net/ipvlan/ipvlan_main.c +@@ -97,12 +97,12 @@ static int ipvlan_set_port_mode(struct i + err = ipvlan_register_nf_hook(read_pnet(&port->pnet)); + if (!err) { + mdev->l3mdev_ops = &ipvl_l3mdev_ops; +- mdev->priv_flags |= IFF_L3MDEV_MASTER; ++ mdev->priv_flags |= IFF_L3MDEV_RX_HANDLER; + } else + goto fail; + } else if (port->mode == IPVLAN_MODE_L3S) { + /* Old mode was L3S */ +- mdev->priv_flags &= ~IFF_L3MDEV_MASTER; ++ mdev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; + ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); + mdev->l3mdev_ops = NULL; + } +@@ -162,7 +162,7 @@ static void ipvlan_port_destroy(struct n + struct sk_buff *skb; + + if (port->mode == IPVLAN_MODE_L3S) { +- dev->priv_flags &= ~IFF_L3MDEV_MASTER; ++ dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; + ipvlan_unregister_nf_hook(dev_net(dev)); + dev->l3mdev_ops = NULL; + } +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -1487,6 +1487,7 @@ struct net_device_ops { + * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook + * @IFF_FAILOVER: device is a failover master device + * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device ++ * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device + */ + enum netdev_priv_flags { + IFF_802_1Q_VLAN = 1<<0, +@@ -1518,6 +1519,7 @@ enum netdev_priv_flags { + IFF_NO_RX_HANDLER = 1<<26, + IFF_FAILOVER = 1<<27, + IFF_FAILOVER_SLAVE = 1<<28, ++ IFF_L3MDEV_RX_HANDLER = 1<<29, + }; + + #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN +@@ -1548,6 +1550,7 @@ enum netdev_priv_flags { + #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER + #define IFF_FAILOVER IFF_FAILOVER + #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE ++#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER + + /** + * struct net_device - The DEVICE structure. +@@ -4523,6 +4526,11 @@ static inline bool netif_supports_nofcs( + return dev->priv_flags & IFF_SUPP_NOFCS; + } + ++static inline bool netif_has_l3_rx_handler(const struct net_device *dev) ++{ ++ return dev->priv_flags & IFF_L3MDEV_RX_HANDLER; ++} ++ + static inline bool netif_is_l3_master(const struct net_device *dev) + { + return dev->priv_flags & IFF_L3MDEV_MASTER; +--- a/include/net/l3mdev.h ++++ b/include/net/l3mdev.h +@@ -142,7 +142,8 @@ struct sk_buff *l3mdev_l3_rcv(struct sk_ + + if (netif_is_l3_slave(skb->dev)) + master = netdev_master_upper_dev_get_rcu(skb->dev); +- else if (netif_is_l3_master(skb->dev)) ++ else if (netif_is_l3_master(skb->dev) || ++ netif_has_l3_rx_handler(skb->dev)) + master = skb->dev; + + if (master && master->l3mdev_ops->l3mdev_l3_rcv) diff --git a/queue-4.20/l2tp-copy-4-more-bytes-to-linear-part-if-necessary.patch b/queue-4.20/l2tp-copy-4-more-bytes-to-linear-part-if-necessary.patch new file mode 100644 index 00000000000..a8c9a9f4ed2 --- /dev/null +++ b/queue-4.20/l2tp-copy-4-more-bytes-to-linear-part-if-necessary.patch @@ -0,0 +1,51 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Jacob Wen +Date: Thu, 31 Jan 2019 15:18:56 +0800 +Subject: l2tp: copy 4 more bytes to linear part if necessary + +From: Jacob Wen + +[ Upstream commit 91c524708de6207f59dd3512518d8a1c7b434ee3 ] + +The size of L2TPv2 header with all optional fields is 14 bytes. +l2tp_udp_recv_core only moves 10 bytes to the linear part of a +skb. This may lead to l2tp_recv_common read data outside of a skb. + +This patch make sure that there is at least 14 bytes in the linear +part of a skb to meet the maximum need of l2tp_udp_recv_core and +l2tp_recv_common. The minimum size of both PPP HDLC-like frame and +Ethernet frame is larger than 14 bytes, so we are safe to do so. + +Also remove L2TP_HDR_SIZE_NOSEQ, it is unused now. + +Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") +Suggested-by: Guillaume Nault +Signed-off-by: Jacob Wen +Acked-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_core.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -83,8 +83,7 @@ + #define L2TP_SLFLAG_S 0x40000000 + #define L2TP_SL_SEQ_MASK 0x00ffffff + +-#define L2TP_HDR_SIZE_SEQ 10 +-#define L2TP_HDR_SIZE_NOSEQ 6 ++#define L2TP_HDR_SIZE_MAX 14 + + /* Default trace flags */ + #define L2TP_DEFAULT_DEBUG_FLAGS 0 +@@ -808,7 +807,7 @@ static int l2tp_udp_recv_core(struct l2t + __skb_pull(skb, sizeof(struct udphdr)); + + /* Short packet? */ +- if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) { ++ if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) { + l2tp_info(tunnel, L2TP_MSG_DATA, + "%s: recv short packet (len=%d)\n", + tunnel->name, skb->len); diff --git a/queue-4.20/l2tp-fix-reading-optional-fields-of-l2tpv3.patch b/queue-4.20/l2tp-fix-reading-optional-fields-of-l2tpv3.patch new file mode 100644 index 00000000000..c1ec979e4c4 --- /dev/null +++ b/queue-4.20/l2tp-fix-reading-optional-fields-of-l2tpv3.patch @@ -0,0 +1,112 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Jacob Wen +Date: Wed, 30 Jan 2019 14:55:14 +0800 +Subject: l2tp: fix reading optional fields of L2TPv3 + +From: Jacob Wen + +[ Upstream commit 4522a70db7aa5e77526a4079628578599821b193 ] + +Use pskb_may_pull() to make sure the optional fields are in skb linear +parts, so we can safely read them later. + +It's easy to reproduce the issue with a net driver that supports paged +skb data. Just create a L2TPv3 over IP tunnel and then generates some +network traffic. +Once reproduced, rx err in /sys/kernel/debug/l2tp/tunnels will increase. + +Changes in v4: +1. s/l2tp_v3_pull_opt/l2tp_v3_ensure_opt_in_linear/ +2. s/tunnel->version != L2TP_HDR_VER_2/tunnel->version == L2TP_HDR_VER_3/ +3. Add 'Fixes' in commit messages. + +Changes in v3: +1. To keep consistency, move the code out of l2tp_recv_common. +2. Use "net" instead of "net-next", since this is a bug fix. + +Changes in v2: +1. Only fix L2TPv3 to make code simple. + To fix both L2TPv3 and L2TPv2, we'd better refactor l2tp_recv_common. + It's complicated to do so. +2. Reloading pointers after pskb_may_pull + +Fixes: f7faffa3ff8e ("l2tp: Add L2TPv3 protocol support") +Fixes: 0d76751fad77 ("l2tp: Add L2TPv3 IP encapsulation (no UDP) support") +Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6") +Signed-off-by: Jacob Wen +Acked-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_core.c | 4 ++++ + net/l2tp/l2tp_core.h | 20 ++++++++++++++++++++ + net/l2tp/l2tp_ip.c | 3 +++ + net/l2tp/l2tp_ip6.c | 3 +++ + 4 files changed, 30 insertions(+) + +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -883,6 +883,10 @@ static int l2tp_udp_recv_core(struct l2t + goto error; + } + ++ if (tunnel->version == L2TP_HDR_VER_3 && ++ l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) ++ goto error; ++ + l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); + l2tp_session_dec_refcount(session); + +--- a/net/l2tp/l2tp_core.h ++++ b/net/l2tp/l2tp_core.h +@@ -301,6 +301,26 @@ static inline bool l2tp_tunnel_uses_xfrm + } + #endif + ++static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb, ++ unsigned char **ptr, unsigned char **optr) ++{ ++ int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session); ++ ++ if (opt_len > 0) { ++ int off = *ptr - *optr; ++ ++ if (!pskb_may_pull(skb, off + opt_len)) ++ return -1; ++ ++ if (skb->data != *optr) { ++ *optr = skb->data; ++ *ptr = skb->data + off; ++ } ++ } ++ ++ return 0; ++} ++ + #define l2tp_printk(ptr, type, func, fmt, ...) \ + do { \ + if (((ptr)->debug) & (type)) \ +--- a/net/l2tp/l2tp_ip.c ++++ b/net/l2tp/l2tp_ip.c +@@ -165,6 +165,9 @@ static int l2tp_ip_recv(struct sk_buff * + print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); + } + ++ if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) ++ goto discard_sess; ++ + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len); + l2tp_session_dec_refcount(session); + +--- a/net/l2tp/l2tp_ip6.c ++++ b/net/l2tp/l2tp_ip6.c +@@ -178,6 +178,9 @@ static int l2tp_ip6_recv(struct sk_buff + print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); + } + ++ if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) ++ goto discard_sess; ++ + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len); + l2tp_session_dec_refcount(session); + diff --git a/queue-4.20/net-ip6_gre-always-reports-o_key-to-userspace.patch b/queue-4.20/net-ip6_gre-always-reports-o_key-to-userspace.patch new file mode 100644 index 00000000000..1a947b193ce --- /dev/null +++ b/queue-4.20/net-ip6_gre-always-reports-o_key-to-userspace.patch @@ -0,0 +1,56 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Lorenzo Bianconi +Date: Mon, 28 Jan 2019 22:23:49 +0100 +Subject: net: ip6_gre: always reports o_key to userspace + +From: Lorenzo Bianconi + +[ Upstream commit c706863bc8902d0c2d1a5a27ac8e1ead5d06b79d ] + +As Erspan_v4, Erspan_v6 protocol relies on o_key to configure +session id header field. However TUNNEL_KEY bit is cleared in +ip6erspan_tunnel_xmit since ERSPAN protocol does not set the key field +of the external GRE header and so the configured o_key is not reported +to userspace. The issue can be triggered with the following reproducer: + +$ip link add ip6erspan1 type ip6erspan local 2000::1 remote 2000::2 \ + key 1 seq erspan_ver 1 +$ip link set ip6erspan1 up +ip -d link sh ip6erspan1 + +ip6erspan1@NONE: mtu 1422 qdisc noop state DOWN mode DEFAULT + link/ether ba:ff:09:24:c3:0e brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500 + ip6erspan remote 2000::2 local 2000::1 encaplimit 4 flowlabel 0x00000 ikey 0.0.0.1 iseq oseq + +Fix the issue adding TUNNEL_KEY bit to the o_flags parameter in +ip6gre_fill_info + +Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") +Signed-off-by: Lorenzo Bianconi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -2102,12 +2102,17 @@ static int ip6gre_fill_info(struct sk_bu + { + struct ip6_tnl *t = netdev_priv(dev); + struct __ip6_tnl_parm *p = &t->parms; ++ __be16 o_flags = p->o_flags; ++ ++ if ((p->erspan_ver == 1 || p->erspan_ver == 2) && ++ !p->collect_md) ++ o_flags |= TUNNEL_KEY; + + if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, + gre_tnl_flags_to_gre_flags(p->i_flags)) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, +- gre_tnl_flags_to_gre_flags(p->o_flags)) || ++ gre_tnl_flags_to_gre_flags(o_flags)) || + nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || + nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || + nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || diff --git a/queue-4.20/net-ip_gre-always-reports-o_key-to-userspace.patch b/queue-4.20/net-ip_gre-always-reports-o_key-to-userspace.patch new file mode 100644 index 00000000000..0bd04a28b04 --- /dev/null +++ b/queue-4.20/net-ip_gre-always-reports-o_key-to-userspace.patch @@ -0,0 +1,56 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Lorenzo Bianconi +Date: Mon, 28 Jan 2019 22:23:48 +0100 +Subject: net: ip_gre: always reports o_key to userspace + +From: Lorenzo Bianconi + +[ Upstream commit feaf5c796b3f0240f10d0d6d0b686715fd58a05b ] + +Erspan protocol (version 1 and 2) relies on o_key to configure +session id header field. However TUNNEL_KEY bit is cleared in +erspan_xmit since ERSPAN protocol does not set the key field +of the external GRE header and so the configured o_key is not reported +to userspace. The issue can be triggered with the following reproducer: + +$ip link add erspan1 type erspan local 192.168.0.1 remote 192.168.0.2 \ + key 1 seq erspan_ver 1 +$ip link set erspan1 up +$ip -d link sh erspan1 + +erspan1@NONE: mtu 1450 qdisc pfifo_fast state UNKNOWN mode DEFAULT + link/ether 52:aa:99:95:9a:b5 brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500 + erspan remote 192.168.0.2 local 192.168.0.1 ttl inherit ikey 0.0.0.1 iseq oseq erspan_index 0 + +Fix the issue adding TUNNEL_KEY bit to the o_flags parameter in +ipgre_fill_info + +Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") +Signed-off-by: Lorenzo Bianconi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_gre.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/ipv4/ip_gre.c ++++ b/net/ipv4/ip_gre.c +@@ -1468,12 +1468,17 @@ static int ipgre_fill_info(struct sk_buf + { + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_parm *p = &t->parms; ++ __be16 o_flags = p->o_flags; ++ ++ if ((t->erspan_ver == 1 || t->erspan_ver == 2) && ++ !t->collect_md) ++ o_flags |= TUNNEL_KEY; + + if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, + gre_tnl_flags_to_gre_flags(p->i_flags)) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, +- gre_tnl_flags_to_gre_flags(p->o_flags)) || ++ gre_tnl_flags_to_gre_flags(o_flags)) || + nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || + nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || + nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || diff --git a/queue-4.20/net-ip_gre-use-erspan-key-field-for-tunnel-lookup.patch b/queue-4.20/net-ip_gre-use-erspan-key-field-for-tunnel-lookup.patch new file mode 100644 index 00000000000..d724866c982 --- /dev/null +++ b/queue-4.20/net-ip_gre-use-erspan-key-field-for-tunnel-lookup.patch @@ -0,0 +1,97 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Lorenzo Bianconi +Date: Fri, 18 Jan 2019 12:05:39 +0100 +Subject: net: ip_gre: use erspan key field for tunnel lookup + +From: Lorenzo Bianconi + +[ Upstream commit cb73ee40b1b381eaf3749e6dbeed567bb38e5258 ] + +Use ERSPAN key header field as tunnel key in gre_parse_header routine +since ERSPAN protocol sets the key field of the external GRE header to +0 resulting in a tunnel lookup fail in ip6gre_err. +In addition remove key field parsing and pskb_may_pull check in +erspan_rcv and ip6erspan_rcv + +Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") +Signed-off-by: Lorenzo Bianconi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/gre_demux.c | 17 +++++++++++++++++ + net/ipv4/ip_gre.c | 9 --------- + net/ipv6/ip6_gre.c | 4 ---- + 3 files changed, 17 insertions(+), 13 deletions(-) + +--- a/net/ipv4/gre_demux.c ++++ b/net/ipv4/gre_demux.c +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -119,6 +120,22 @@ int gre_parse_header(struct sk_buff *skb + hdr_len += 4; + } + tpi->hdr_len = hdr_len; ++ ++ /* ERSPAN ver 1 and 2 protocol sets GRE key field ++ * to 0 and sets the configured key in the ++ * inner erspan header field ++ */ ++ if (greh->protocol == htons(ETH_P_ERSPAN) || ++ greh->protocol == htons(ETH_P_ERSPAN2)) { ++ struct erspan_base_hdr *ershdr; ++ ++ if (!pskb_may_pull(skb, nhs + hdr_len + sizeof(*ershdr))) ++ return -EINVAL; ++ ++ ershdr = (struct erspan_base_hdr *)options; ++ tpi->key = cpu_to_be32(get_session_id(ershdr)); ++ } ++ + return hdr_len; + } + EXPORT_SYMBOL(gre_parse_header); +--- a/net/ipv4/ip_gre.c ++++ b/net/ipv4/ip_gre.c +@@ -266,20 +266,11 @@ static int erspan_rcv(struct sk_buff *sk + int len; + + itn = net_generic(net, erspan_net_id); +- len = gre_hdr_len + sizeof(*ershdr); +- +- /* Check based hdr len */ +- if (unlikely(!pskb_may_pull(skb, len))) +- return PACKET_REJECT; + + iph = ip_hdr(skb); + ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); + ver = ershdr->ver; + +- /* The original GRE header does not have key field, +- * Use ERSPAN 10-bit session ID as key. +- */ +- tpi->key = cpu_to_be32(get_session_id(ershdr)); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, + tpi->flags | TUNNEL_KEY, + iph->saddr, iph->daddr, tpi->key); +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -532,13 +532,9 @@ static int ip6erspan_rcv(struct sk_buff + struct ip6_tnl *tunnel; + u8 ver; + +- if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr)))) +- return PACKET_REJECT; +- + ipv6h = ipv6_hdr(skb); + ershdr = (struct erspan_base_hdr *)skb->data; + ver = ershdr->ver; +- tpi->key = cpu_to_be32(get_session_id(ershdr)); + + tunnel = ip6gre_tunnel_lookup(skb->dev, + &ipv6h->saddr, &ipv6h->daddr, tpi->key, diff --git a/queue-4.20/net-ipv6-don-t-return-positive-numbers-when-nothing-was-dumped.patch b/queue-4.20/net-ipv6-don-t-return-positive-numbers-when-nothing-was-dumped.patch new file mode 100644 index 00000000000..28ba7647c0f --- /dev/null +++ b/queue-4.20/net-ipv6-don-t-return-positive-numbers-when-nothing-was-dumped.patch @@ -0,0 +1,42 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Jakub Kicinski +Date: Tue, 22 Jan 2019 14:47:19 -0800 +Subject: net/ipv6: don't return positive numbers when nothing was dumped + +From: Jakub Kicinski + +[ Upstream commit 1518039f6b5ac794313c24c76f85cead0cd60f6c ] + +in6_dump_addrs() returns a positive 1 if there was nothing to dump. +This return value can not be passed as return from inet6_dump_addr() +as is, because it will confuse rtnetlink, resulting in NLMSG_DONE +never getting set: + +$ ip addr list dev lo +EOF on netlink +Dump terminated + +v2: flip condition to avoid a new goto (DaveA) + +Fixes: 7c1e8a3817c5 ("netlink: fixup regression in RTM_GETADDR") +Reported-by: Brendan Galloway +Signed-off-by: Jakub Kicinski +Reviewed-by: David Ahern +Tested-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -5120,6 +5120,8 @@ static int inet6_dump_addr(struct sk_buf + if (idev) { + err = in6_dump_addrs(idev, skb, cb, s_ip_idx, + &fillargs); ++ if (err > 0) ++ err = 0; + } + goto put_tgt_net; + } diff --git a/queue-4.20/net-mlx4_core-add-masking-for-a-few-queries-on-hca-caps.patch b/queue-4.20/net-mlx4_core-add-masking-for-a-few-queries-on-hca-caps.patch new file mode 100644 index 00000000000..2a065733ea2 --- /dev/null +++ b/queue-4.20/net-mlx4_core-add-masking-for-a-few-queries-on-hca-caps.patch @@ -0,0 +1,142 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Aya Levin +Date: Tue, 22 Jan 2019 15:19:44 +0200 +Subject: net/mlx4_core: Add masking for a few queries on HCA caps + +From: Aya Levin + +[ Upstream commit a40ded6043658444ee4dd6ee374119e4e98b33fc ] + +Driver reads the query HCA capabilities without the corresponding masks. +Without the correct masks, the base addresses of the queues are +unaligned. In addition some reserved bits were wrongly read. Using the +correct masks, ensures alignment of the base addresses and allows future +firmware versions safe use of the reserved bits. + +Fixes: ab9c17a009ee ("mlx4_core: Modify driver initialization flow to accommodate SRIOV for Ethernet") +Fixes: 0ff1fb654bec ("{NET, IB}/mlx4: Add device managed flow steering firmware API") +Signed-off-by: Aya Levin +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/fw.c | 75 +++++++++++++++++++------------- + 1 file changed, 46 insertions(+), 29 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/fw.c ++++ b/drivers/net/ethernet/mellanox/mlx4/fw.c +@@ -2064,9 +2064,11 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, + { + struct mlx4_cmd_mailbox *mailbox; + __be32 *outbox; ++ u64 qword_field; + u32 dword_field; +- int err; ++ u16 word_field; + u8 byte_field; ++ int err; + static const u8 a0_dmfs_query_hw_steering[] = { + [0] = MLX4_STEERING_DMFS_A0_DEFAULT, + [1] = MLX4_STEERING_DMFS_A0_DYNAMIC, +@@ -2094,19 +2096,32 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, + + /* QPC/EEC/CQC/EQC/RDMARC attributes */ + +- MLX4_GET(param->qpc_base, outbox, INIT_HCA_QPC_BASE_OFFSET); +- MLX4_GET(param->log_num_qps, outbox, INIT_HCA_LOG_QP_OFFSET); +- MLX4_GET(param->srqc_base, outbox, INIT_HCA_SRQC_BASE_OFFSET); +- MLX4_GET(param->log_num_srqs, outbox, INIT_HCA_LOG_SRQ_OFFSET); +- MLX4_GET(param->cqc_base, outbox, INIT_HCA_CQC_BASE_OFFSET); +- MLX4_GET(param->log_num_cqs, outbox, INIT_HCA_LOG_CQ_OFFSET); +- MLX4_GET(param->altc_base, outbox, INIT_HCA_ALTC_BASE_OFFSET); +- MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET); +- MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET); +- MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET); +- MLX4_GET(param->num_sys_eqs, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); +- MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); +- MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); ++ MLX4_GET(qword_field, outbox, INIT_HCA_QPC_BASE_OFFSET); ++ param->qpc_base = qword_field & ~((u64)0x1f); ++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_QP_OFFSET); ++ param->log_num_qps = byte_field & 0x1f; ++ MLX4_GET(qword_field, outbox, INIT_HCA_SRQC_BASE_OFFSET); ++ param->srqc_base = qword_field & ~((u64)0x1f); ++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_SRQ_OFFSET); ++ param->log_num_srqs = byte_field & 0x1f; ++ MLX4_GET(qword_field, outbox, INIT_HCA_CQC_BASE_OFFSET); ++ param->cqc_base = qword_field & ~((u64)0x1f); ++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_CQ_OFFSET); ++ param->log_num_cqs = byte_field & 0x1f; ++ MLX4_GET(qword_field, outbox, INIT_HCA_ALTC_BASE_OFFSET); ++ param->altc_base = qword_field; ++ MLX4_GET(qword_field, outbox, INIT_HCA_AUXC_BASE_OFFSET); ++ param->auxc_base = qword_field; ++ MLX4_GET(qword_field, outbox, INIT_HCA_EQC_BASE_OFFSET); ++ param->eqc_base = qword_field & ~((u64)0x1f); ++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_EQ_OFFSET); ++ param->log_num_eqs = byte_field & 0x1f; ++ MLX4_GET(word_field, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); ++ param->num_sys_eqs = word_field & 0xfff; ++ MLX4_GET(qword_field, outbox, INIT_HCA_RDMARC_BASE_OFFSET); ++ param->rdmarc_base = qword_field & ~((u64)0x1f); ++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_RD_OFFSET); ++ param->log_rd_per_qp = byte_field & 0x7; + + MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET); + if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) { +@@ -2125,22 +2140,21 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, + /* steering attributes */ + if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { + MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); +- MLX4_GET(param->log_mc_entry_sz, outbox, +- INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); +- MLX4_GET(param->log_mc_table_sz, outbox, +- INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); +- MLX4_GET(byte_field, outbox, +- INIT_HCA_FS_A0_OFFSET); ++ MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); ++ param->log_mc_entry_sz = byte_field & 0x1f; ++ MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); ++ param->log_mc_table_sz = byte_field & 0x1f; ++ MLX4_GET(byte_field, outbox, INIT_HCA_FS_A0_OFFSET); + param->dmfs_high_steer_mode = + a0_dmfs_query_hw_steering[(byte_field >> 6) & 3]; + } else { + MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); +- MLX4_GET(param->log_mc_entry_sz, outbox, +- INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); +- MLX4_GET(param->log_mc_hash_sz, outbox, +- INIT_HCA_LOG_MC_HASH_SZ_OFFSET); +- MLX4_GET(param->log_mc_table_sz, outbox, +- INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); ++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); ++ param->log_mc_entry_sz = byte_field & 0x1f; ++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); ++ param->log_mc_hash_sz = byte_field & 0x1f; ++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); ++ param->log_mc_table_sz = byte_field & 0x1f; + } + + /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ +@@ -2164,15 +2178,18 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, + /* TPT attributes */ + + MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); +- MLX4_GET(param->mw_enabled, outbox, INIT_HCA_TPT_MW_OFFSET); +- MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); ++ MLX4_GET(byte_field, outbox, INIT_HCA_TPT_MW_OFFSET); ++ param->mw_enabled = byte_field >> 7; ++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); ++ param->log_mpt_sz = byte_field & 0x3f; + MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET); + MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET); + + /* UAR attributes */ + + MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET); +- MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); ++ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); ++ param->log_uar_sz = byte_field & 0xf; + + /* phv_check enable */ + MLX4_GET(byte_field, outbox, INIT_HCA_CACHELINE_SZ_OFFSET); diff --git a/queue-4.20/net-mlx5e-allow-mac-invalidation-while-spoofchk-is-on.patch b/queue-4.20/net-mlx5e-allow-mac-invalidation-while-spoofchk-is-on.patch new file mode 100644 index 00000000000..0725f600f50 --- /dev/null +++ b/queue-4.20/net-mlx5e-allow-mac-invalidation-while-spoofchk-is-on.patch @@ -0,0 +1,67 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Aya Levin +Date: Mon, 24 Dec 2018 09:48:42 +0200 +Subject: net/mlx5e: Allow MAC invalidation while spoofchk is ON + +From: Aya Levin + +[ Upstream commit 9d2cbdc5d334967c35b5f58c7bf3208e17325647 ] + +Prior to this patch the driver prohibited spoof checking on invalid MAC. +Now the user can set this configuration if it wishes to. + +This is required since libvirt might invalidate the VF Mac by setting it +to zero, while spoofcheck is ON. + +Fixes: 1ab2068a4c66 ("net/mlx5: Implement vports admin state backup/restore") +Signed-off-by: Aya Levin +Reviewed-by: Eran Ben Elisha +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 18 ++++++------------ + 1 file changed, 6 insertions(+), 12 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +@@ -1133,13 +1133,6 @@ static int esw_vport_ingress_config(stru + int err = 0; + u8 *smac_v; + +- if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) { +- mlx5_core_warn(esw->dev, +- "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", +- vport->vport); +- return -EPERM; +- } +- + esw_vport_cleanup_ingress_rules(esw, vport); + + if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { +@@ -1812,13 +1805,10 @@ int mlx5_eswitch_set_vport_mac(struct ml + mutex_lock(&esw->state_lock); + evport = &esw->vports[vport]; + +- if (evport->info.spoofchk && !is_valid_ether_addr(mac)) { ++ if (evport->info.spoofchk && !is_valid_ether_addr(mac)) + mlx5_core_warn(esw->dev, +- "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n", ++ "Set invalid MAC while spoofchk is on, vport(%d)\n", + vport); +- err = -EPERM; +- goto unlock; +- } + + err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); + if (err) { +@@ -1964,6 +1954,10 @@ int mlx5_eswitch_set_vport_spoofchk(stru + evport = &esw->vports[vport]; + pschk = evport->info.spoofchk; + evport->info.spoofchk = spoofchk; ++ if (pschk && !is_valid_ether_addr(evport->info.mac)) ++ mlx5_core_warn(esw->dev, ++ "Spoofchk in set while MAC is invalid, vport(%d)\n", ++ evport->vport); + if (evport->enabled && esw->mode == SRIOV_LEGACY) + err = esw_vport_ingress_config(esw, evport); + if (err) diff --git a/queue-4.20/net-rose-fix-null-ax25_cb-kernel-panic.patch b/queue-4.20/net-rose-fix-null-ax25_cb-kernel-panic.patch new file mode 100644 index 00000000000..5cdb79eb4ac --- /dev/null +++ b/queue-4.20/net-rose-fix-null-ax25_cb-kernel-panic.patch @@ -0,0 +1,66 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Bernard Pidoux +Date: Fri, 25 Jan 2019 11:46:40 +0100 +Subject: net/rose: fix NULL ax25_cb kernel panic + +From: Bernard Pidoux + +[ Upstream commit b0cf029234f9b18e10703ba5147f0389c382bccc ] + +When an internally generated frame is handled by rose_xmit(), +rose_route_frame() is called: + + if (!rose_route_frame(skb, NULL)) { + dev_kfree_skb(skb); + stats->tx_errors++; + return NETDEV_TX_OK; + } + +We have the same code sequence in Net/Rom where an internally generated +frame is handled by nr_xmit() calling nr_route_frame(skb, NULL). +However, in this function NULL argument is tested while it is not in +rose_route_frame(). +Then kernel panic occurs later on when calling ax25cmp() with a NULL +ax25_cb argument as reported many times and recently with syzbot. + +We need to test if ax25 is NULL before using it. + +Testing: +Built kernel with CONFIG_ROSE=y. + +Signed-off-by: Bernard Pidoux +Acked-by: Dmitry Vyukov +Reported-by: syzbot+1a2c456a1ea08fa5b5f7@syzkaller.appspotmail.com +Cc: "David S. Miller" +Cc: Ralf Baechle +Cc: Bernard Pidoux +Cc: linux-hams@vger.kernel.org +Cc: netdev@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rose/rose_route.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/net/rose/rose_route.c ++++ b/net/rose/rose_route.c +@@ -850,6 +850,7 @@ void rose_link_device_down(struct net_de + + /* + * Route a frame to an appropriate AX.25 connection. ++ * A NULL ax25_cb indicates an internally generated frame. + */ + int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) + { +@@ -867,6 +868,10 @@ int rose_route_frame(struct sk_buff *skb + + if (skb->len < ROSE_MIN_LEN) + return res; ++ ++ if (!ax25) ++ return rose_loopback_queue(skb, NULL); ++ + frametype = skb->data[2]; + lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); + if (frametype == ROSE_CALL_REQUEST && diff --git a/queue-4.20/net-set-default-network-namespace-in-init_dummy_netdev.patch b/queue-4.20/net-set-default-network-namespace-in-init_dummy_netdev.patch new file mode 100644 index 00000000000..5b08e2b0d3a --- /dev/null +++ b/queue-4.20/net-set-default-network-namespace-in-init_dummy_netdev.patch @@ -0,0 +1,43 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Josh Elsasser +Date: Sat, 26 Jan 2019 14:38:33 -0800 +Subject: net: set default network namespace in init_dummy_netdev() + +From: Josh Elsasser + +[ Upstream commit 35edfdc77f683c8fd27d7732af06cf6489af60a5 ] + +Assign a default net namespace to netdevs created by init_dummy_netdev(). +Fixes a NULL pointer dereference caused by busy-polling a socket bound to +an iwlwifi wireless device, which bumps the per-net BUSYPOLLRXPACKETS stat +if napi_poll() received packets: + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000190 + IP: napi_busy_loop+0xd6/0x200 + Call Trace: + sock_poll+0x5e/0x80 + do_sys_poll+0x324/0x5a0 + SyS_poll+0x6c/0xf0 + do_syscall_64+0x6b/0x1f0 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 + +Fixes: 7db6b048da3b ("net: Commonize busy polling code to focus on napi_id instead of socket") +Signed-off-by: Josh Elsasser +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -8624,6 +8624,9 @@ int init_dummy_netdev(struct net_device + set_bit(__LINK_STATE_PRESENT, &dev->state); + set_bit(__LINK_STATE_START, &dev->state); + ++ /* napi_busy_loop stats accounting wants this */ ++ dev_net_set(dev, &init_net); ++ + /* Note : We dont allocate pcpu_refcnt for dummy devices, + * because users of this 'device' dont need to change + * its refcount. diff --git a/queue-4.20/net-tls-fix-deadlock-in-free_resources-tx.patch b/queue-4.20/net-tls-fix-deadlock-in-free_resources-tx.patch new file mode 100644 index 00000000000..a66f35903c7 --- /dev/null +++ b/queue-4.20/net-tls-fix-deadlock-in-free_resources-tx.patch @@ -0,0 +1,34 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Dave Watson +Date: Sun, 27 Jan 2019 00:59:03 +0000 +Subject: net: tls: Fix deadlock in free_resources tx + +From: Dave Watson + +[ Upstream commit 1023121375c6b0b3dc00334983c762ba2b76cb19 ] + +If there are outstanding async tx requests (when crypto returns EINPROGRESS), +there is a potential deadlock: the tx work acquires the lock, while we +cancel_delayed_work_sync() while holding the lock. Drop the lock while waiting +for the work to complete. + +Fixes: a42055e8d2c30 ("Add support for async encryption of records...") +Signed-off-by: Dave Watson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tls/tls_sw.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1768,7 +1768,9 @@ void tls_sw_free_resources_tx(struct soc + if (atomic_read(&ctx->encrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + ++ release_sock(sk); + cancel_delayed_work_sync(&ctx->tx_work.work); ++ lock_sock(sk); + + /* Tx whatever records we can transmit and abandon the rest */ + tls_tx_records(sk, -1); diff --git a/queue-4.20/net-tls-save-iv-in-tls_rec-for-async-crypto-requests.patch b/queue-4.20/net-tls-save-iv-in-tls_rec-for-async-crypto-requests.patch new file mode 100644 index 00000000000..748cc68a09f --- /dev/null +++ b/queue-4.20/net-tls-save-iv-in-tls_rec-for-async-crypto-requests.patch @@ -0,0 +1,58 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Dave Watson +Date: Sun, 27 Jan 2019 00:57:38 +0000 +Subject: net: tls: Save iv in tls_rec for async crypto requests + +From: Dave Watson + +[ Upstream commit 32eb67b93c9e3cd62cb423e30b090cdd4aa8d275 ] + +aead_request_set_crypt takes an iv pointer, and we change the iv +soon after setting it. Some async crypto algorithms don't save the iv, +so we need to save it in the tls_rec for async requests. + +Found by hardcoding x64 aesni to use async crypto manager (to test the async +codepath), however I don't think this combination can happen in the wild. +Presumably other hardware offloads will need this fix, but there have been +no user reports. + +Fixes: a42055e8d2c30 ("Add support for async encryption of records...") +Signed-off-by: Dave Watson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tls.h | 2 ++ + net/tls/tls_sw.c | 4 +++- + 2 files changed, 5 insertions(+), 1 deletion(-) + +--- a/include/net/tls.h ++++ b/include/net/tls.h +@@ -120,6 +120,8 @@ struct tls_rec { + struct scatterlist sg_aead_out[2]; + + char aad_space[TLS_AAD_SPACE_SIZE]; ++ u8 iv_data[TLS_CIPHER_AES_GCM_128_IV_SIZE + ++ TLS_CIPHER_AES_GCM_128_SALT_SIZE]; + struct aead_request aead_req; + u8 aead_req_ctx[]; + }; +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -439,6 +439,8 @@ static int tls_do_encryption(struct sock + struct scatterlist *sge = sk_msg_elem(msg_en, start); + int rc; + ++ memcpy(rec->iv_data, tls_ctx->tx.iv, sizeof(rec->iv_data)); ++ + sge->offset += tls_ctx->tx.prepend_size; + sge->length -= tls_ctx->tx.prepend_size; + +@@ -448,7 +450,7 @@ static int tls_do_encryption(struct sock + aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); + aead_request_set_crypt(aead_req, rec->sg_aead_in, + rec->sg_aead_out, +- data_len, tls_ctx->tx.iv); ++ data_len, rec->iv_data); + + aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, + tls_encrypt_done, sk); diff --git a/queue-4.20/netrom-switch-to-sock-timer-api.patch b/queue-4.20/netrom-switch-to-sock-timer-api.patch new file mode 100644 index 00000000000..e7887da07ca --- /dev/null +++ b/queue-4.20/netrom-switch-to-sock-timer-api.patch @@ -0,0 +1,95 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Cong Wang +Date: Thu, 24 Jan 2019 14:18:18 -0800 +Subject: netrom: switch to sock timer API + +From: Cong Wang + +[ Upstream commit 63346650c1a94a92be61a57416ac88c0a47c4327 ] + +sk_reset_timer() and sk_stop_timer() properly handle +sock refcnt for timer function. Switching to them +could fix a refcounting bug reported by syzbot. + +Reported-and-tested-by: syzbot+defa700d16f1bd1b9a05@syzkaller.appspotmail.com +Cc: Ralf Baechle +Cc: linux-hams@vger.kernel.org +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netrom/nr_timer.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +--- a/net/netrom/nr_timer.c ++++ b/net/netrom/nr_timer.c +@@ -52,21 +52,21 @@ void nr_start_t1timer(struct sock *sk) + { + struct nr_sock *nr = nr_sk(sk); + +- mod_timer(&nr->t1timer, jiffies + nr->t1); ++ sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1); + } + + void nr_start_t2timer(struct sock *sk) + { + struct nr_sock *nr = nr_sk(sk); + +- mod_timer(&nr->t2timer, jiffies + nr->t2); ++ sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2); + } + + void nr_start_t4timer(struct sock *sk) + { + struct nr_sock *nr = nr_sk(sk); + +- mod_timer(&nr->t4timer, jiffies + nr->t4); ++ sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4); + } + + void nr_start_idletimer(struct sock *sk) +@@ -74,37 +74,37 @@ void nr_start_idletimer(struct sock *sk) + struct nr_sock *nr = nr_sk(sk); + + if (nr->idle > 0) +- mod_timer(&nr->idletimer, jiffies + nr->idle); ++ sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle); + } + + void nr_start_heartbeat(struct sock *sk) + { +- mod_timer(&sk->sk_timer, jiffies + 5 * HZ); ++ sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ); + } + + void nr_stop_t1timer(struct sock *sk) + { +- del_timer(&nr_sk(sk)->t1timer); ++ sk_stop_timer(sk, &nr_sk(sk)->t1timer); + } + + void nr_stop_t2timer(struct sock *sk) + { +- del_timer(&nr_sk(sk)->t2timer); ++ sk_stop_timer(sk, &nr_sk(sk)->t2timer); + } + + void nr_stop_t4timer(struct sock *sk) + { +- del_timer(&nr_sk(sk)->t4timer); ++ sk_stop_timer(sk, &nr_sk(sk)->t4timer); + } + + void nr_stop_idletimer(struct sock *sk) + { +- del_timer(&nr_sk(sk)->idletimer); ++ sk_stop_timer(sk, &nr_sk(sk)->idletimer); + } + + void nr_stop_heartbeat(struct sock *sk) + { +- del_timer(&sk->sk_timer); ++ sk_stop_timer(sk, &sk->sk_timer); + } + + int nr_t1timer_running(struct sock *sk) diff --git a/queue-4.20/ravb-expand-rx-descriptor-data-to-accommodate-hw-checksum.patch b/queue-4.20/ravb-expand-rx-descriptor-data-to-accommodate-hw-checksum.patch new file mode 100644 index 00000000000..27fb2b77a27 --- /dev/null +++ b/queue-4.20/ravb-expand-rx-descriptor-data-to-accommodate-hw-checksum.patch @@ -0,0 +1,75 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Simon Horman +Date: Wed, 23 Jan 2019 12:14:52 +0100 +Subject: ravb: expand rx descriptor data to accommodate hw checksum + +From: Simon Horman + +[ Upstream commit 12da64300fbc76b875900445f4146c3dc617d43e ] + +EtherAVB may provide a checksum of packet data appended to packet data. In +order to allow this checksum to be received by the host descriptor data +needs to be enlarged by 2 bytes to accommodate the checksum. + +In the case of MTU-sized packets without a VLAN tag the +checksum were already accommodated by virtue of the space reserved for the +VLAN tag. However, a packet of MTU-size with a VLAN tag consumed all +packet data space provided by a descriptor leaving no space for the +trailing checksum. + +This was not detected by the driver which incorrectly used the last two +bytes of packet data as the checksum and truncate the packet by two bytes. +This resulted all such packets being dropped. + +A work around is to disable RX checksum offload + # ethtool -K eth0 rx off + +This patch resolves this problem by increasing the size available for +packet data in RX descriptors by two bytes. + +Tested on R-Car E3 (r8a77990) ES1.0 based Ebisu-4D board + +v2 +* Use sizeof(__sum16) directly rather than adding a driver-local + #define for the size of the checksum provided by the hw (2 bytes). + +Fixes: 4d86d3818627 ("ravb: RX checksum offload") +Signed-off-by: Simon Horman +Reviewed-by: Sergei Shtylyov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/renesas/ravb_main.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/renesas/ravb_main.c ++++ b/drivers/net/ethernet/renesas/ravb_main.c +@@ -350,7 +350,7 @@ static int ravb_ring_init(struct net_dev + int i; + + priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) + +- ETH_HLEN + VLAN_HLEN; ++ ETH_HLEN + VLAN_HLEN + sizeof(__sum16); + + /* Allocate RX and TX skb rings */ + priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q], +@@ -533,13 +533,15 @@ static void ravb_rx_csum(struct sk_buff + { + u8 *hw_csum; + +- /* The hardware checksum is 2 bytes appended to packet data */ +- if (unlikely(skb->len < 2)) ++ /* The hardware checksum is contained in sizeof(__sum16) (2) bytes ++ * appended to packet data ++ */ ++ if (unlikely(skb->len < sizeof(__sum16))) + return; +- hw_csum = skb_tail_pointer(skb) - 2; ++ hw_csum = skb_tail_pointer(skb) - sizeof(__sum16); + skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum)); + skb->ip_summed = CHECKSUM_COMPLETE; +- skb_trim(skb, skb->len - 2); ++ skb_trim(skb, skb->len - sizeof(__sum16)); + } + + /* Packet receive function for Ethernet AVB */ diff --git a/queue-4.20/revert-net-mlx5e-e-switch-initialize-eswitch-only-if-eswitch-manager.patch b/queue-4.20/revert-net-mlx5e-e-switch-initialize-eswitch-only-if-eswitch-manager.patch new file mode 100644 index 00000000000..9ebcfbbf807 --- /dev/null +++ b/queue-4.20/revert-net-mlx5e-e-switch-initialize-eswitch-only-if-eswitch-manager.patch @@ -0,0 +1,54 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Bodong Wang +Date: Sun, 13 Jan 2019 22:47:26 -0600 +Subject: Revert "net/mlx5e: E-Switch, Initialize eswitch only if eswitch manager" + +From: Bodong Wang + +[ Upstream commit 4e046de0f50e04acd48eb373d6a9061ddf014e0c ] + +This reverts commit 5f5991f36dce1e69dd8bd7495763eec2e28f08e7. + +With the original commit, eswitch instance will not be initialized for +a function which is vport group manager but not eswitch manager such as +host PF on SmartNIC (BlueField) card. This will result in a kernel crash +when such a vport group manager is trying to access vports in its group. +E.g, PF vport manager (not eswitch manager) tries to configure the MAC +of its VF vport, a kernel trace will happen similar as bellow: + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 + ... + RIP: 0010:mlx5_eswitch_get_vport_config+0xc/0x180 [mlx5_core] + ... + +Fixes: 5f5991f36dce ("net/mlx5e: E-Switch, Initialize eswitch only if eswitch manager") +Signed-off-by: Bodong Wang +Reported-by: Yuval Avnery +Reviewed-by: Daniel Jurgens +Reviewed-by: Or Gerlitz +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +@@ -1689,7 +1689,7 @@ int mlx5_eswitch_init(struct mlx5_core_d + int vport_num; + int err; + +- if (!MLX5_ESWITCH_MANAGER(dev)) ++ if (!MLX5_VPORT_MANAGER(dev)) + return 0; + + esw_info(dev, +@@ -1758,7 +1758,7 @@ abort: + + void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) + { +- if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev)) ++ if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) + return; + + esw_info(esw->dev, "cleanup\n"); diff --git a/queue-4.20/sctp-improve-the-events-for-sctp-stream-adding.patch b/queue-4.20/sctp-improve-the-events-for-sctp-stream-adding.patch new file mode 100644 index 00000000000..97d487c4746 --- /dev/null +++ b/queue-4.20/sctp-improve-the-events-for-sctp-stream-adding.patch @@ -0,0 +1,71 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Xin Long +Date: Tue, 22 Jan 2019 02:40:12 +0800 +Subject: sctp: improve the events for sctp stream adding + +From: Xin Long + +[ Upstream commit 8220c870cb0f4eaa4e335c9645dbd9a1c461c1dd ] + +This patch is to improve sctp stream adding events in 2 places: + + 1. In sctp_process_strreset_addstrm_out(), move up SCTP_MAX_STREAM + and in stream allocation failure checks, as the adding has to + succeed after reconf_timer stops for the in stream adding + request retransmission. + + 3. In sctp_process_strreset_addstrm_in(), no event should be sent, + as no in or out stream is added here. + +Fixes: 50a41591f110 ("sctp: implement receiver-side procedures for the Add Outgoing Streams Request Parameter") +Fixes: c5c4ebb3ab87 ("sctp: implement receiver-side procedures for the Add Incoming Streams Request Parameter") +Reported-by: Ying Xu +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/stream.c | 19 ++++++++----------- + 1 file changed, 8 insertions(+), 11 deletions(-) + +--- a/net/sctp/stream.c ++++ b/net/sctp/stream.c +@@ -866,6 +866,14 @@ struct sctp_chunk *sctp_process_strreset + if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ)) + goto out; + ++ in = ntohs(addstrm->number_of_streams); ++ incnt = stream->incnt + in; ++ if (!in || incnt > SCTP_MAX_STREAM) ++ goto out; ++ ++ if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC)) ++ goto out; ++ + if (asoc->strreset_chunk) { + if (!sctp_chunk_lookup_strreset_param( + asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) { +@@ -889,14 +897,6 @@ struct sctp_chunk *sctp_process_strreset + } + } + +- in = ntohs(addstrm->number_of_streams); +- incnt = stream->incnt + in; +- if (!in || incnt > SCTP_MAX_STREAM) +- goto out; +- +- if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC)) +- goto out; +- + stream->incnt = incnt; + + result = SCTP_STRRESET_PERFORMED; +@@ -966,9 +966,6 @@ struct sctp_chunk *sctp_process_strreset + + result = SCTP_STRRESET_PERFORMED; + +- *evp = sctp_ulpevent_make_stream_change_event(asoc, +- 0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC); +- + out: + sctp_update_strreset_result(asoc, result); + err: diff --git a/queue-4.20/sctp-improve-the-events-for-sctp-stream-reset.patch b/queue-4.20/sctp-improve-the-events-for-sctp-stream-reset.patch new file mode 100644 index 00000000000..2e88e2a07a5 --- /dev/null +++ b/queue-4.20/sctp-improve-the-events-for-sctp-stream-reset.patch @@ -0,0 +1,131 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Xin Long +Date: Tue, 22 Jan 2019 02:39:34 +0800 +Subject: sctp: improve the events for sctp stream reset + +From: Xin Long + +[ Upstream commit 2e6dc4d95110becfe0ff4c3d4749c33ea166e9e7 ] + +This patch is to improve sctp stream reset events in 4 places: + + 1. In sctp_process_strreset_outreq(), the flag should always be set with + SCTP_STREAM_RESET_INCOMING_SSN instead of OUTGOING, as receiver's in + stream is reset here. + 2. In sctp_process_strreset_outreq(), move up SCTP_STRRESET_ERR_WRONG_SSN + check, as the reset has to succeed after reconf_timer stops for the + in stream reset request retransmission. + 3. In sctp_process_strreset_inreq(), no event should be sent, as no in + or out stream is reset here. + 4. In sctp_process_strreset_resp(), SCTP_STREAM_RESET_INCOMING_SSN or + OUTGOING event should always be sent for stream reset requests, no + matter it fails or succeeds to process the request. + +Fixes: 810544764536 ("sctp: implement receiver-side procedures for the Outgoing SSN Reset Request Parameter") +Fixes: 16e1a91965b0 ("sctp: implement receiver-side procedures for the Incoming SSN Reset Request Parameter") +Fixes: 11ae76e67a17 ("sctp: implement receiver-side procedures for the Reconf Response Parameter") +Reported-by: Ying Xu +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/stream.c | 39 +++++++++++++++++---------------------- + 1 file changed, 17 insertions(+), 22 deletions(-) + +--- a/net/sctp/stream.c ++++ b/net/sctp/stream.c +@@ -585,9 +585,9 @@ struct sctp_chunk *sctp_process_strreset + struct sctp_strreset_outreq *outreq = param.v; + struct sctp_stream *stream = &asoc->stream; + __u32 result = SCTP_STRRESET_DENIED; +- __u16 i, nums, flags = 0; + __be16 *str_p = NULL; + __u32 request_seq; ++ __u16 i, nums; + + request_seq = ntohl(outreq->request_seq); + +@@ -615,6 +615,15 @@ struct sctp_chunk *sctp_process_strreset + if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ)) + goto out; + ++ nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16); ++ str_p = outreq->list_of_streams; ++ for (i = 0; i < nums; i++) { ++ if (ntohs(str_p[i]) >= stream->incnt) { ++ result = SCTP_STRRESET_ERR_WRONG_SSN; ++ goto out; ++ } ++ } ++ + if (asoc->strreset_chunk) { + if (!sctp_chunk_lookup_strreset_param( + asoc, outreq->response_seq, +@@ -637,32 +646,19 @@ struct sctp_chunk *sctp_process_strreset + sctp_chunk_put(asoc->strreset_chunk); + asoc->strreset_chunk = NULL; + } +- +- flags = SCTP_STREAM_RESET_INCOMING_SSN; + } + +- nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16); +- if (nums) { +- str_p = outreq->list_of_streams; +- for (i = 0; i < nums; i++) { +- if (ntohs(str_p[i]) >= stream->incnt) { +- result = SCTP_STRRESET_ERR_WRONG_SSN; +- goto out; +- } +- } +- ++ if (nums) + for (i = 0; i < nums; i++) + SCTP_SI(stream, ntohs(str_p[i]))->mid = 0; +- } else { ++ else + for (i = 0; i < stream->incnt; i++) + SCTP_SI(stream, i)->mid = 0; +- } + + result = SCTP_STRRESET_PERFORMED; + + *evp = sctp_ulpevent_make_stream_reset_event(asoc, +- flags | SCTP_STREAM_RESET_OUTGOING_SSN, nums, str_p, +- GFP_ATOMIC); ++ SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC); + + out: + sctp_update_strreset_result(asoc, result); +@@ -738,9 +734,6 @@ struct sctp_chunk *sctp_process_strreset + + result = SCTP_STRRESET_PERFORMED; + +- *evp = sctp_ulpevent_make_stream_reset_event(asoc, +- SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC); +- + out: + sctp_update_strreset_result(asoc, result); + err: +@@ -1036,10 +1029,10 @@ struct sctp_chunk *sctp_process_strreset + sout->mid_uo = 0; + } + } +- +- flags = SCTP_STREAM_RESET_OUTGOING_SSN; + } + ++ flags |= SCTP_STREAM_RESET_OUTGOING_SSN; ++ + for (i = 0; i < stream->outcnt; i++) + SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; + +@@ -1058,6 +1051,8 @@ struct sctp_chunk *sctp_process_strreset + nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / + sizeof(__u16); + ++ flags |= SCTP_STREAM_RESET_INCOMING_SSN; ++ + *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, + nums, str_p, GFP_ATOMIC); + } else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) { diff --git a/queue-4.20/sctp-set-chunk-transport-correctly-when-it-s-a-new-asoc.patch b/queue-4.20/sctp-set-chunk-transport-correctly-when-it-s-a-new-asoc.patch new file mode 100644 index 00000000000..1292cc34935 --- /dev/null +++ b/queue-4.20/sctp-set-chunk-transport-correctly-when-it-s-a-new-asoc.patch @@ -0,0 +1,65 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Xin Long +Date: Tue, 22 Jan 2019 02:42:09 +0800 +Subject: sctp: set chunk transport correctly when it's a new asoc + +From: Xin Long + +[ Upstream commit 4ff40b86262b73553ee47cc3784ce8ba0f220bd8 ] + +In the paths: + + sctp_sf_do_unexpected_init() -> + sctp_make_init_ack() + sctp_sf_do_dupcook_a/b()() -> + sctp_sf_do_5_1D_ce() + +The new chunk 'retval' transport is set from the incoming chunk 'chunk' +transport. However, 'retval' transport belong to the new asoc, which +is a different one from 'chunk' transport's asoc. + +It will cause that the 'retval' chunk gets set with a wrong transport. +Later when sending it and because of Commit b9fd683982c9 ("sctp: add +sctp_packet_singleton"), sctp_packet_singleton() will set some fields, +like vtag to 'retval' chunk from that wrong transport's asoc. + +This patch is to fix it by setting 'retval' transport correctly which +belongs to the right asoc in sctp_make_init_ack() and +sctp_sf_do_5_1D_ce(). + +Fixes: b9fd683982c9 ("sctp: add sctp_packet_singleton") +Reported-by: Ying Xu +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/sm_make_chunk.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/net/sctp/sm_make_chunk.c ++++ b/net/sctp/sm_make_chunk.c +@@ -495,7 +495,10 @@ struct sctp_chunk *sctp_make_init_ack(co + * + * [INIT ACK back to where the INIT came from.] + */ +- retval->transport = chunk->transport; ++ if (chunk->transport) ++ retval->transport = ++ sctp_assoc_lookup_paddr(asoc, ++ &chunk->transport->ipaddr); + + retval->subh.init_hdr = + sctp_addto_chunk(retval, sizeof(initack), &initack); +@@ -642,8 +645,10 @@ struct sctp_chunk *sctp_make_cookie_ack( + * + * [COOKIE ACK back to where the COOKIE ECHO came from.] + */ +- if (retval && chunk) +- retval->transport = chunk->transport; ++ if (retval && chunk && chunk->transport) ++ retval->transport = ++ sctp_assoc_lookup_paddr(asoc, ++ &chunk->transport->ipaddr); + + return retval; + } diff --git a/queue-4.20/sctp-set-flow-sport-from-saddr-only-when-it-s-0.patch b/queue-4.20/sctp-set-flow-sport-from-saddr-only-when-it-s-0.patch new file mode 100644 index 00000000000..1c39fba4f5b --- /dev/null +++ b/queue-4.20/sctp-set-flow-sport-from-saddr-only-when-it-s-0.patch @@ -0,0 +1,66 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Xin Long +Date: Tue, 22 Jan 2019 02:42:41 +0800 +Subject: sctp: set flow sport from saddr only when it's 0 + +From: Xin Long + +[ Upstream commit ecf938fe7d0088077ee1280419a2b3c5429b47c8 ] + +Now sctp_transport_pmtu() passes transport->saddr into .get_dst() to set +flow sport from 'saddr'. However, transport->saddr is set only when +transport->dst exists in sctp_transport_route(). + +If sctp_transport_pmtu() is called without transport->saddr set, like +when transport->dst doesn't exists, the flow sport will be set to 0 +from transport->saddr, which will cause a wrong route to be got. + +Commit 6e91b578bf3f ("sctp: re-use sctp_transport_pmtu in +sctp_transport_route") made the issue be triggered more easily +since sctp_transport_pmtu() would be called in sctp_transport_route() +after that. + +In gerneral, fl4->fl4_sport should always be set to +htons(asoc->base.bind_addr.port), unless transport->asoc doesn't exist +in sctp_v4/6_get_dst(), which is the case: + + sctp_ootb_pkt_new() -> + sctp_transport_route() + +For that, we can simply handle it by setting flow sport from saddr only +when it's 0 in sctp_v4/6_get_dst(). + +Fixes: 6e91b578bf3f ("sctp: re-use sctp_transport_pmtu in sctp_transport_route") +Reported-by: Ying Xu +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/ipv6.c | 3 ++- + net/sctp/protocol.c | 3 ++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +--- a/net/sctp/ipv6.c ++++ b/net/sctp/ipv6.c +@@ -277,7 +277,8 @@ static void sctp_v6_get_dst(struct sctp_ + + if (saddr) { + fl6->saddr = saddr->v6.sin6_addr; +- fl6->fl6_sport = saddr->v6.sin6_port; ++ if (!fl6->fl6_sport) ++ fl6->fl6_sport = saddr->v6.sin6_port; + + pr_debug("src=%pI6 - ", &fl6->saddr); + } +--- a/net/sctp/protocol.c ++++ b/net/sctp/protocol.c +@@ -440,7 +440,8 @@ static void sctp_v4_get_dst(struct sctp_ + } + if (saddr) { + fl4->saddr = saddr->v4.sin_addr.s_addr; +- fl4->fl4_sport = saddr->v4.sin_port; ++ if (!fl4->fl4_sport) ++ fl4->fl4_sport = saddr->v4.sin_port; + } + + pr_debug("%s: dst:%pI4, src:%pI4 - ", __func__, &fl4->daddr, diff --git a/queue-4.20/series b/queue-4.20/series index ff5f7101159..a5a35c22d08 100644 --- a/queue-4.20/series +++ b/queue-4.20/series @@ -1 +1,34 @@ drm-msm-gpu-fix-building-without-debugfs.patch +ipv6-sr-clear-ip6cb-skb-on-srh-ip4ip6-encapsulation.patch +ipvlan-l3mdev-fix-broken-l3s-mode-wrt-local-routes.patch +l2tp-copy-4-more-bytes-to-linear-part-if-necessary.patch +l2tp-fix-reading-optional-fields-of-l2tpv3.patch +net-ip_gre-always-reports-o_key-to-userspace.patch +net-ip_gre-use-erspan-key-field-for-tunnel-lookup.patch +net-ipv6-don-t-return-positive-numbers-when-nothing-was-dumped.patch +net-mlx4_core-add-masking-for-a-few-queries-on-hca-caps.patch +netrom-switch-to-sock-timer-api.patch +net-rose-fix-null-ax25_cb-kernel-panic.patch +net-set-default-network-namespace-in-init_dummy_netdev.patch +ravb-expand-rx-descriptor-data-to-accommodate-hw-checksum.patch +sctp-improve-the-events-for-sctp-stream-reset.patch +tun-move-the-call-to-tun_set_real_num_queues.patch +ucc_geth-reset-bql-queue-when-stopping-device.patch +vhost-fix-oob-in-get_rx_bufs.patch +net-ip6_gre-always-reports-o_key-to-userspace.patch +sctp-improve-the-events-for-sctp-stream-adding.patch +net-mlx5e-allow-mac-invalidation-while-spoofchk-is-on.patch +ip6mr-fix-notifiers-call-on-mroute_clean_tables.patch +revert-net-mlx5e-e-switch-initialize-eswitch-only-if-eswitch-manager.patch +sctp-set-chunk-transport-correctly-when-it-s-a-new-asoc.patch +sctp-set-flow-sport-from-saddr-only-when-it-s-0.patch +net-tls-fix-deadlock-in-free_resources-tx.patch +net-tls-save-iv-in-tls_rec-for-async-crypto-requests.patch +virtio_net-don-t-enable-napi-when-interface-is-down.patch +virtio_net-don-t-call-free_old_xmit_skbs-for-xdp_frames.patch +virtio_net-fix-not-restoring-real_num_rx_queues.patch +virtio_net-fix-out-of-bounds-access-of-sq.patch +virtio_net-don-t-process-redirected-xdp-frames-when-xdp-is-disabled.patch +virtio_net-use-xdp_return_frame-to-free-xdp_frames-on-destroying-vqs.patch +virtio_net-differentiate-sk_buff-and-xdp_frame-on-freeing.patch +ipv6-consider-sk_bound_dev_if-when-binding-a-socket-to-an-address.patch diff --git a/queue-4.20/tun-move-the-call-to-tun_set_real_num_queues.patch b/queue-4.20/tun-move-the-call-to-tun_set_real_num_queues.patch new file mode 100644 index 00000000000..6cea8edb77e --- /dev/null +++ b/queue-4.20/tun-move-the-call-to-tun_set_real_num_queues.patch @@ -0,0 +1,43 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: George Amanakis +Date: Tue, 29 Jan 2019 22:50:13 -0500 +Subject: tun: move the call to tun_set_real_num_queues + +From: George Amanakis + +[ Upstream commit 3a03cb8456cc1d61c467a5375e0a10e5207b948c ] + +Call tun_set_real_num_queues() after the increment of tun->numqueues +since the former depends on it. Otherwise, the number of queues is not +correctly accounted for, which results to warnings similar to: +"vnet0 selects TX queue 11, but real number of TX queues is 11". + +Fixes: 0b7959b62573 ("tun: publish tfile after it's fully initialized") +Reported-and-tested-by: George Amanakis +Signed-off-by: George Amanakis +Signed-off-by: Stanislav Fomichev +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -862,8 +862,6 @@ static int tun_attach(struct tun_struct + if (rtnl_dereference(tun->xdp_prog)) + sock_set_flag(&tfile->sk, SOCK_XDP); + +- tun_set_real_num_queues(tun); +- + /* device is allowed to go away first, so no need to hold extra + * refcnt. + */ +@@ -875,6 +873,7 @@ static int tun_attach(struct tun_struct + rcu_assign_pointer(tfile->tun, tun); + rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); + tun->numqueues++; ++ tun_set_real_num_queues(tun); + out: + return err; + } diff --git a/queue-4.20/ucc_geth-reset-bql-queue-when-stopping-device.patch b/queue-4.20/ucc_geth-reset-bql-queue-when-stopping-device.patch new file mode 100644 index 00000000000..4dee7c7645c --- /dev/null +++ b/queue-4.20/ucc_geth-reset-bql-queue-when-stopping-device.patch @@ -0,0 +1,33 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Mathias Thore +Date: Mon, 28 Jan 2019 10:07:47 +0100 +Subject: ucc_geth: Reset BQL queue when stopping device + +From: Mathias Thore + +[ Upstream commit e15aa3b2b1388c399c1a2ce08550d2cc4f7e3e14 ] + +After a timeout event caused by for example a broadcast storm, when +the MAC and PHY are reset, the BQL TX queue needs to be reset as +well. Otherwise, the device will exhibit severe performance issues +even after the storm has ended. + +Co-authored-by: David Gounaris +Signed-off-by: Mathias Thore +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/ucc_geth.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/ethernet/freescale/ucc_geth.c ++++ b/drivers/net/ethernet/freescale/ucc_geth.c +@@ -1883,6 +1883,8 @@ static void ucc_geth_free_tx(struct ucc_ + u16 i, j; + u8 __iomem *bd; + ++ netdev_reset_queue(ugeth->ndev); ++ + ug_info = ugeth->ug_info; + uf_info = &ug_info->uf_info; + diff --git a/queue-4.20/vhost-fix-oob-in-get_rx_bufs.patch b/queue-4.20/vhost-fix-oob-in-get_rx_bufs.patch new file mode 100644 index 00000000000..50c8d24ff93 --- /dev/null +++ b/queue-4.20/vhost-fix-oob-in-get_rx_bufs.patch @@ -0,0 +1,146 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Jason Wang +Date: Mon, 28 Jan 2019 15:05:05 +0800 +Subject: vhost: fix OOB in get_rx_bufs() + +From: Jason Wang + +[ Upstream commit b46a0bf78ad7b150ef5910da83859f7f5a514ffd ] + +After batched used ring updating was introduced in commit e2b3b35eb989 +("vhost_net: batch used ring update in rx"). We tend to batch heads in +vq->heads for more than one packet. But the quota passed to +get_rx_bufs() was not correctly limited, which can result a OOB write +in vq->heads. + + headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx, + vhost_len, &in, vq_log, &log, + likely(mergeable) ? UIO_MAXIOV : 1); + +UIO_MAXIOV was still used which is wrong since we could have batched +used in vq->heads, this will cause OOB if the next buffer needs more +than 960 (1024 (UIO_MAXIOV) - 64 (VHOST_NET_BATCH)) heads after we've +batched 64 (VHOST_NET_BATCH) heads: +Acked-by: Stefan Hajnoczi + +============================================================================= +BUG kmalloc-8k (Tainted: G B ): Redzone overwritten +----------------------------------------------------------------------------- + +INFO: 0x00000000fd93b7a2-0x00000000f0713384. First byte 0xa9 instead of 0xcc +INFO: Allocated in alloc_pd+0x22/0x60 age=3933677 cpu=2 pid=2674 + kmem_cache_alloc_trace+0xbb/0x140 + alloc_pd+0x22/0x60 + gen8_ppgtt_create+0x11d/0x5f0 + i915_ppgtt_create+0x16/0x80 + i915_gem_create_context+0x248/0x390 + i915_gem_context_create_ioctl+0x4b/0xe0 + drm_ioctl_kernel+0xa5/0xf0 + drm_ioctl+0x2ed/0x3a0 + do_vfs_ioctl+0x9f/0x620 + ksys_ioctl+0x6b/0x80 + __x64_sys_ioctl+0x11/0x20 + do_syscall_64+0x43/0xf0 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 +INFO: Slab 0x00000000d13e87af objects=3 used=3 fp=0x (null) flags=0x200000000010201 +INFO: Object 0x0000000003278802 @offset=17064 fp=0x00000000e2e6652b + +Fixing this by allocating UIO_MAXIOV + VHOST_NET_BATCH iovs for +vhost-net. This is done through set the limitation through +vhost_dev_init(), then set_owner can allocate the number of iov in a +per device manner. + +This fixes CVE-2018-16880. + +Fixes: e2b3b35eb989 ("vhost_net: batch used ring update in rx") +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/net.c | 3 ++- + drivers/vhost/scsi.c | 2 +- + drivers/vhost/vhost.c | 7 ++++--- + drivers/vhost/vhost.h | 4 +++- + drivers/vhost/vsock.c | 2 +- + 5 files changed, 11 insertions(+), 7 deletions(-) + +--- a/drivers/vhost/net.c ++++ b/drivers/vhost/net.c +@@ -1293,7 +1293,8 @@ static int vhost_net_open(struct inode * + n->vqs[i].rx_ring = NULL; + vhost_net_buf_init(&n->vqs[i].rxq); + } +- vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); ++ vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, ++ UIO_MAXIOV + VHOST_NET_BATCH); + + vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); + vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); +--- a/drivers/vhost/scsi.c ++++ b/drivers/vhost/scsi.c +@@ -1628,7 +1628,7 @@ static int vhost_scsi_open(struct inode + vqs[i] = &vs->vqs[i].vq; + vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; + } +- vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ); ++ vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV); + + vhost_scsi_init_inflight(vs, NULL); + +--- a/drivers/vhost/vhost.c ++++ b/drivers/vhost/vhost.c +@@ -390,9 +390,9 @@ static long vhost_dev_alloc_iovecs(struc + vq->indirect = kmalloc_array(UIO_MAXIOV, + sizeof(*vq->indirect), + GFP_KERNEL); +- vq->log = kmalloc_array(UIO_MAXIOV, sizeof(*vq->log), ++ vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log), + GFP_KERNEL); +- vq->heads = kmalloc_array(UIO_MAXIOV, sizeof(*vq->heads), ++ vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads), + GFP_KERNEL); + if (!vq->indirect || !vq->log || !vq->heads) + goto err_nomem; +@@ -414,7 +414,7 @@ static void vhost_dev_free_iovecs(struct + } + + void vhost_dev_init(struct vhost_dev *dev, +- struct vhost_virtqueue **vqs, int nvqs) ++ struct vhost_virtqueue **vqs, int nvqs, int iov_limit) + { + struct vhost_virtqueue *vq; + int i; +@@ -427,6 +427,7 @@ void vhost_dev_init(struct vhost_dev *de + dev->iotlb = NULL; + dev->mm = NULL; + dev->worker = NULL; ++ dev->iov_limit = iov_limit; + init_llist_head(&dev->work_list); + init_waitqueue_head(&dev->wait); + INIT_LIST_HEAD(&dev->read_list); +--- a/drivers/vhost/vhost.h ++++ b/drivers/vhost/vhost.h +@@ -170,9 +170,11 @@ struct vhost_dev { + struct list_head read_list; + struct list_head pending_list; + wait_queue_head_t wait; ++ int iov_limit; + }; + +-void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); ++void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, ++ int nvqs, int iov_limit); + long vhost_dev_set_owner(struct vhost_dev *dev); + bool vhost_dev_has_owner(struct vhost_dev *dev); + long vhost_dev_check_owner(struct vhost_dev *); +--- a/drivers/vhost/vsock.c ++++ b/drivers/vhost/vsock.c +@@ -531,7 +531,7 @@ static int vhost_vsock_dev_open(struct i + vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; + vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; + +- vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); ++ vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV); + + file->private_data = vsock; + spin_lock_init(&vsock->send_pkt_list_lock); diff --git a/queue-4.20/virtio_net-differentiate-sk_buff-and-xdp_frame-on-freeing.patch b/queue-4.20/virtio_net-differentiate-sk_buff-and-xdp_frame-on-freeing.patch new file mode 100644 index 00000000000..22089838d5f --- /dev/null +++ b/queue-4.20/virtio_net-differentiate-sk_buff-and-xdp_frame-on-freeing.patch @@ -0,0 +1,197 @@ +From foo@baz Sat Feb 2 10:53:01 CET 2019 +From: Toshiaki Makita +Date: Tue, 29 Jan 2019 09:45:59 +0900 +Subject: virtio_net: Differentiate sk_buff and xdp_frame on freeing + +From: Toshiaki Makita + +[ Upstream commit 5050471d35d1316ba32dfcbb409978337eb9e75e + + I had to fold commit df133f3f9625 ("virtio_net: bulk free tx skbs") + into this to make it work. ] + +We do not reset or free up unused buffers when enabling/disabling XDP, +so it can happen that xdp_frames are freed after disabling XDP or +sk_buffs are freed after enabling XDP on xdp tx queues. +Thus we need to handle both forms (xdp_frames and sk_buffs) regardless +of XDP setting. +One way to trigger this problem is to disable XDP when napi_tx is +enabled. In that case, virtnet_xdp_set() calls virtnet_napi_enable() +which kicks NAPI. The NAPI handler will call virtnet_poll_cleantx() +which invokes free_old_xmit_skbs() for queues which have been used by +XDP. + +Note that even with this change we need to keep skipping +free_old_xmit_skbs() from NAPI handlers when XDP is enabled, because XDP +tx queues do not aquire queue locks. + +- v2: Use napi_consume_skb() instead of dev_consume_skb_any() + +Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set") +Signed-off-by: Toshiaki Makita +Acked-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 64 ++++++++++++++++++++++++++++++++++------------- + 1 file changed, 47 insertions(+), 17 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -57,6 +57,8 @@ module_param(napi_tx, bool, 0644); + #define VIRTIO_XDP_TX BIT(0) + #define VIRTIO_XDP_REDIR BIT(1) + ++#define VIRTIO_XDP_FLAG BIT(0) ++ + /* RX packet size EWMA. The average packet size is used to determine the packet + * buffer size when refilling RX rings. As the entire RX ring may be refilled + * at once, the weight is chosen so that the EWMA will be insensitive to short- +@@ -251,6 +253,21 @@ struct padded_vnet_hdr { + char padding[4]; + }; + ++static bool is_xdp_frame(void *ptr) ++{ ++ return (unsigned long)ptr & VIRTIO_XDP_FLAG; ++} ++ ++static void *xdp_to_ptr(struct xdp_frame *ptr) ++{ ++ return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); ++} ++ ++static struct xdp_frame *ptr_to_xdp(void *ptr) ++{ ++ return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); ++} ++ + /* Converting between virtqueue no. and kernel tx/rx queue no. + * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq + */ +@@ -461,7 +478,8 @@ static int __virtnet_xdp_xmit_one(struct + + sg_init_one(sq->sg, xdpf->data, xdpf->len); + +- err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC); ++ err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), ++ GFP_ATOMIC); + if (unlikely(err)) + return -ENOSPC; /* Caller handle free/refcnt */ + +@@ -481,13 +499,13 @@ static int virtnet_xdp_xmit(struct net_d + { + struct virtnet_info *vi = netdev_priv(dev); + struct receive_queue *rq = vi->rq; +- struct xdp_frame *xdpf_sent; + struct bpf_prog *xdp_prog; + struct send_queue *sq; + unsigned int len; + int drops = 0; + int kicks = 0; + int ret, err; ++ void *ptr; + int i; + + /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this +@@ -506,8 +524,12 @@ static int virtnet_xdp_xmit(struct net_d + } + + /* Free up any pending old buffers before queueing new ones. */ +- while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) +- xdp_return_frame(xdpf_sent); ++ while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { ++ if (likely(is_xdp_frame(ptr))) ++ xdp_return_frame(ptr_to_xdp(ptr)); ++ else ++ napi_consume_skb(ptr, false); ++ } + + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; +@@ -1326,20 +1348,28 @@ static int virtnet_receive(struct receiv + return stats.packets; + } + +-static void free_old_xmit_skbs(struct send_queue *sq) ++static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) + { +- struct sk_buff *skb; + unsigned int len; + unsigned int packets = 0; + unsigned int bytes = 0; ++ void *ptr; + +- while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) { +- pr_debug("Sent skb %p\n", skb); ++ while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { ++ if (likely(!is_xdp_frame(ptr))) { ++ struct sk_buff *skb = ptr; + +- bytes += skb->len; +- packets++; ++ pr_debug("Sent skb %p\n", skb); + +- dev_consume_skb_any(skb); ++ bytes += skb->len; ++ napi_consume_skb(skb, in_napi); ++ } else { ++ struct xdp_frame *frame = ptr_to_xdp(ptr); ++ ++ bytes += frame->len; ++ xdp_return_frame(frame); ++ } ++ packets++; + } + + /* Avoid overhead when no packets have been processed +@@ -1375,7 +1405,7 @@ static void virtnet_poll_cleantx(struct + return; + + if (__netif_tx_trylock(txq)) { +- free_old_xmit_skbs(sq); ++ free_old_xmit_skbs(sq, true); + __netif_tx_unlock(txq); + } + +@@ -1459,7 +1489,7 @@ static int virtnet_poll_tx(struct napi_s + + txq = netdev_get_tx_queue(vi->dev, index); + __netif_tx_lock(txq, raw_smp_processor_id()); +- free_old_xmit_skbs(sq); ++ free_old_xmit_skbs(sq, true); + __netif_tx_unlock(txq); + + virtqueue_napi_complete(napi, sq->vq, 0); +@@ -1528,7 +1558,7 @@ static netdev_tx_t start_xmit(struct sk_ + bool use_napi = sq->napi.weight; + + /* Free up any pending old buffers before queueing new ones. */ +- free_old_xmit_skbs(sq); ++ free_old_xmit_skbs(sq, false); + + if (use_napi && kick) + virtqueue_enable_cb_delayed(sq->vq); +@@ -1571,7 +1601,7 @@ static netdev_tx_t start_xmit(struct sk_ + if (!use_napi && + unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { + /* More just got used, free them then recheck. */ +- free_old_xmit_skbs(sq); ++ free_old_xmit_skbs(sq, false); + if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { + netif_start_subqueue(dev, qnum); + virtqueue_disable_cb(sq->vq); +@@ -2639,10 +2669,10 @@ static void free_unused_bufs(struct virt + for (i = 0; i < vi->max_queue_pairs; i++) { + struct virtqueue *vq = vi->sq[i].vq; + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { +- if (!is_xdp_raw_buffer_queue(vi, i)) ++ if (!is_xdp_frame(buf)) + dev_kfree_skb(buf); + else +- xdp_return_frame(buf); ++ xdp_return_frame(ptr_to_xdp(buf)); + } + } + diff --git a/queue-4.20/virtio_net-don-t-call-free_old_xmit_skbs-for-xdp_frames.patch b/queue-4.20/virtio_net-don-t-call-free_old_xmit_skbs-for-xdp_frames.patch new file mode 100644 index 00000000000..9d3585d6d86 --- /dev/null +++ b/queue-4.20/virtio_net-don-t-call-free_old_xmit_skbs-for-xdp_frames.patch @@ -0,0 +1,143 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Toshiaki Makita +Date: Tue, 29 Jan 2019 09:45:54 +0900 +Subject: virtio_net: Don't call free_old_xmit_skbs for xdp_frames + +From: Toshiaki Makita + +[ Upstream commit 534da5e856334fb54cb0272a9fb3afec28ea3aed ] + +When napi_tx is enabled, virtnet_poll_cleantx() called +free_old_xmit_skbs() even for xdp send queue. +This is bogus since the queue has xdp_frames, not sk_buffs, thus mangled +device tx bytes counters because skb->len is meaningless value, and even +triggered oops due to general protection fault on freeing them. + +Since xdp send queues do not aquire locks, old xdp_frames should be +freed only in virtnet_xdp_xmit(), so just skip free_old_xmit_skbs() for +xdp send queues. + +Similarly virtnet_poll_tx() called free_old_xmit_skbs(). This NAPI +handler is called even without calling start_xmit() because cb for tx is +by default enabled. Once the handler is called, it enabled the cb again, +and then the handler would be called again. We don't need this handler +for XDP, so don't enable cb as well as not calling free_old_xmit_skbs(). + +Also, we need to disable tx NAPI when disabling XDP, so +virtnet_poll_tx() can safely access curr_queue_pairs and +xdp_queue_pairs, which are not atomically updated while disabling XDP. + +Fixes: b92f1e6751a6 ("virtio-net: transmit napi") +Fixes: 7b0411ef4aa6 ("virtio-net: clean tx descriptors from rx napi") +Signed-off-by: Toshiaki Makita +Acked-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 49 +++++++++++++++++++++++++++++++---------------- + 1 file changed, 33 insertions(+), 16 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -1357,6 +1357,16 @@ static void free_old_xmit_skbs(struct se + u64_stats_update_end(&sq->stats.syncp); + } + ++static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) ++{ ++ if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) ++ return false; ++ else if (q < vi->curr_queue_pairs) ++ return true; ++ else ++ return false; ++} ++ + static void virtnet_poll_cleantx(struct receive_queue *rq) + { + struct virtnet_info *vi = rq->vq->vdev->priv; +@@ -1364,7 +1374,7 @@ static void virtnet_poll_cleantx(struct + struct send_queue *sq = &vi->sq[index]; + struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); + +- if (!sq->napi.weight) ++ if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) + return; + + if (__netif_tx_trylock(txq)) { +@@ -1441,8 +1451,16 @@ static int virtnet_poll_tx(struct napi_s + { + struct send_queue *sq = container_of(napi, struct send_queue, napi); + struct virtnet_info *vi = sq->vq->vdev->priv; +- struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); ++ unsigned int index = vq2txq(sq->vq); ++ struct netdev_queue *txq; + ++ if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { ++ /* We don't need to enable cb for XDP */ ++ napi_complete_done(napi, 0); ++ return 0; ++ } ++ ++ txq = netdev_get_tx_queue(vi->dev, index); + __netif_tx_lock(txq, raw_smp_processor_id()); + free_old_xmit_skbs(sq); + __netif_tx_unlock(txq); +@@ -2401,9 +2419,12 @@ static int virtnet_xdp_set(struct net_de + } + + /* Make sure NAPI is not using any XDP TX queues for RX. */ +- if (netif_running(dev)) +- for (i = 0; i < vi->max_queue_pairs; i++) ++ if (netif_running(dev)) { ++ for (i = 0; i < vi->max_queue_pairs; i++) { + napi_disable(&vi->rq[i].napi); ++ virtnet_napi_tx_disable(&vi->sq[i].napi); ++ } ++ } + + netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); + err = _virtnet_set_queues(vi, curr_qp + xdp_qp); +@@ -2422,16 +2443,22 @@ static int virtnet_xdp_set(struct net_de + } + if (old_prog) + bpf_prog_put(old_prog); +- if (netif_running(dev)) ++ if (netif_running(dev)) { + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); ++ virtnet_napi_tx_enable(vi, vi->sq[i].vq, ++ &vi->sq[i].napi); ++ } + } + + return 0; + + err: + if (netif_running(dev)) { +- for (i = 0; i < vi->max_queue_pairs; i++) ++ for (i = 0; i < vi->max_queue_pairs; i++) { + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); ++ virtnet_napi_tx_enable(vi, vi->sq[i].vq, ++ &vi->sq[i].napi); ++ } + } + if (prog) + bpf_prog_sub(prog, vi->max_queue_pairs - 1); +@@ -2588,16 +2615,6 @@ static void free_receive_page_frags(stru + put_page(vi->rq[i].alloc_frag.page); + } + +-static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) +-{ +- if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) +- return false; +- else if (q < vi->curr_queue_pairs) +- return true; +- else +- return false; +-} +- + static void free_unused_bufs(struct virtnet_info *vi) + { + void *buf; diff --git a/queue-4.20/virtio_net-don-t-enable-napi-when-interface-is-down.patch b/queue-4.20/virtio_net-don-t-enable-napi-when-interface-is-down.patch new file mode 100644 index 00000000000..0b105793280 --- /dev/null +++ b/queue-4.20/virtio_net-don-t-enable-napi-when-interface-is-down.patch @@ -0,0 +1,42 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Toshiaki Makita +Date: Tue, 29 Jan 2019 09:45:53 +0900 +Subject: virtio_net: Don't enable NAPI when interface is down + +From: Toshiaki Makita + +[ Upstream commit 8be4d9a492f88b96d4d3a06c6cbedbc40ca14c83 ] + +Commit 4e09ff536284 ("virtio-net: disable NAPI only when enabled during +XDP set") tried to fix inappropriate NAPI enabling/disabling when +!netif_running(), but was not complete. + +On error path virtio_net could enable NAPI even when !netif_running(). +This can cause enabling NAPI twice on virtnet_open(), which would +trigger BUG_ON() in napi_enable(). + +Fixes: 4941d472bf95b ("virtio-net: do not reset during XDP set") +Signed-off-by: Toshiaki Makita +Acked-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -2429,8 +2429,10 @@ static int virtnet_xdp_set(struct net_de + return 0; + + err: +- for (i = 0; i < vi->max_queue_pairs; i++) +- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); ++ if (netif_running(dev)) { ++ for (i = 0; i < vi->max_queue_pairs; i++) ++ virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); ++ } + if (prog) + bpf_prog_sub(prog, vi->max_queue_pairs - 1); + return err; diff --git a/queue-4.20/virtio_net-don-t-process-redirected-xdp-frames-when-xdp-is-disabled.patch b/queue-4.20/virtio_net-don-t-process-redirected-xdp-frames-when-xdp-is-disabled.patch new file mode 100644 index 00000000000..12526951396 --- /dev/null +++ b/queue-4.20/virtio_net-don-t-process-redirected-xdp-frames-when-xdp-is-disabled.patch @@ -0,0 +1,121 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Toshiaki Makita +Date: Tue, 29 Jan 2019 09:45:57 +0900 +Subject: virtio_net: Don't process redirected XDP frames when XDP is disabled + +From: Toshiaki Makita + +[ Upstream commit 03aa6d34868c07b2b1b8b2db080602d7ec528173 ] + +Commit 8dcc5b0ab0ec ("virtio_net: fix ndo_xdp_xmit crash towards dev not +ready for XDP") tried to avoid access to unexpected sq while XDP is +disabled, but was not complete. + +There was a small window which causes out of bounds sq access in +virtnet_xdp_xmit() while disabling XDP. + +An example case of + - curr_queue_pairs = 6 (2 for SKB and 4 for XDP) + - online_cpu_num = xdp_queue_paris = 4 +when XDP is enabled: + +CPU 0 CPU 1 +(Disabling XDP) (Processing redirected XDP frames) + + virtnet_xdp_xmit() +virtnet_xdp_set() + _virtnet_set_queues() + set curr_queue_pairs (2) + check if rq->xdp_prog is not NULL + virtnet_xdp_sq(vi) + qp = curr_queue_pairs - + xdp_queue_pairs + + smp_processor_id() + = 2 - 4 + 1 = -1 + sq = &vi->sq[qp] // out of bounds access + set xdp_queue_pairs (0) + rq->xdp_prog = NULL + +Basically we should not change curr_queue_pairs and xdp_queue_pairs +while someone can read the values. Thus, when disabling XDP, assign NULL +to rq->xdp_prog first, and wait for RCU grace period, then change +xxx_queue_pairs. +Note that we need to keep the current order when enabling XDP though. + +- v2: Make rcu_assign_pointer/synchronize_net conditional instead of + _virtnet_set_queues. + +Fixes: 186b3c998c50 ("virtio-net: support XDP_REDIRECT") +Signed-off-by: Toshiaki Makita +Acked-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 33 ++++++++++++++++++++++++++------- + 1 file changed, 26 insertions(+), 7 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -2409,6 +2409,10 @@ static int virtnet_xdp_set(struct net_de + return -ENOMEM; + } + ++ old_prog = rtnl_dereference(vi->rq[0].xdp_prog); ++ if (!prog && !old_prog) ++ return 0; ++ + if (prog) { + prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); + if (IS_ERR(prog)) +@@ -2423,21 +2427,30 @@ static int virtnet_xdp_set(struct net_de + } + } + ++ if (!prog) { ++ for (i = 0; i < vi->max_queue_pairs; i++) { ++ rcu_assign_pointer(vi->rq[i].xdp_prog, prog); ++ if (i == 0) ++ virtnet_restore_guest_offloads(vi); ++ } ++ synchronize_net(); ++ } ++ + err = _virtnet_set_queues(vi, curr_qp + xdp_qp); + if (err) + goto err; + netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); + vi->xdp_queue_pairs = xdp_qp; + +- for (i = 0; i < vi->max_queue_pairs; i++) { +- old_prog = rtnl_dereference(vi->rq[i].xdp_prog); +- rcu_assign_pointer(vi->rq[i].xdp_prog, prog); +- if (i == 0) { +- if (!old_prog) ++ if (prog) { ++ for (i = 0; i < vi->max_queue_pairs; i++) { ++ rcu_assign_pointer(vi->rq[i].xdp_prog, prog); ++ if (i == 0 && !old_prog) + virtnet_clear_guest_offloads(vi); +- if (!prog) +- virtnet_restore_guest_offloads(vi); + } ++ } ++ ++ for (i = 0; i < vi->max_queue_pairs; i++) { + if (old_prog) + bpf_prog_put(old_prog); + if (netif_running(dev)) { +@@ -2450,6 +2463,12 @@ static int virtnet_xdp_set(struct net_de + return 0; + + err: ++ if (!prog) { ++ virtnet_clear_guest_offloads(vi); ++ for (i = 0; i < vi->max_queue_pairs; i++) ++ rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); ++ } ++ + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) { + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); diff --git a/queue-4.20/virtio_net-fix-not-restoring-real_num_rx_queues.patch b/queue-4.20/virtio_net-fix-not-restoring-real_num_rx_queues.patch new file mode 100644 index 00000000000..80017a3140d --- /dev/null +++ b/queue-4.20/virtio_net-fix-not-restoring-real_num_rx_queues.patch @@ -0,0 +1,38 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Toshiaki Makita +Date: Tue, 29 Jan 2019 09:45:55 +0900 +Subject: virtio_net: Fix not restoring real_num_rx_queues + +From: Toshiaki Makita + +[ Upstream commit 188313c137c4f76afd0862f50dbc185b198b9e2a ] + +When _virtnet_set_queues() failed we did not restore real_num_rx_queues. +Fix this by placing the change of real_num_rx_queues after +_virtnet_set_queues(). +This order is also in line with virtnet_set_channels(). + +Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set") +Signed-off-by: Toshiaki Makita +Acked-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -2426,10 +2426,10 @@ static int virtnet_xdp_set(struct net_de + } + } + +- netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); + err = _virtnet_set_queues(vi, curr_qp + xdp_qp); + if (err) + goto err; ++ netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); + vi->xdp_queue_pairs = xdp_qp; + + for (i = 0; i < vi->max_queue_pairs; i++) { diff --git a/queue-4.20/virtio_net-fix-out-of-bounds-access-of-sq.patch b/queue-4.20/virtio_net-fix-out-of-bounds-access-of-sq.patch new file mode 100644 index 00000000000..4b3d608fc24 --- /dev/null +++ b/queue-4.20/virtio_net-fix-out-of-bounds-access-of-sq.patch @@ -0,0 +1,54 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Toshiaki Makita +Date: Tue, 29 Jan 2019 09:45:56 +0900 +Subject: virtio_net: Fix out of bounds access of sq + +From: Toshiaki Makita + +[ Upstream commit 1667c08a9d31c7cdf09f4890816bfbf20b685495 ] + +When XDP is disabled, curr_queue_pairs + smp_processor_id() can be +larger than max_queue_pairs. +There is no guarantee that we have enough XDP send queues dedicated for +each cpu when XDP is disabled, so do not count drops on sq in that case. + +Fixes: 5b8f3c8d30a6 ("virtio_net: Add XDP related stats") +Signed-off-by: Toshiaki Makita +Acked-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 17 +++++++---------- + 1 file changed, 7 insertions(+), 10 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -490,20 +490,17 @@ static int virtnet_xdp_xmit(struct net_d + int ret, err; + int i; + +- sq = virtnet_xdp_sq(vi); +- +- if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { +- ret = -EINVAL; +- drops = n; +- goto out; +- } +- + /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this + * indicate XDP resources have been successfully allocated. + */ + xdp_prog = rcu_dereference(rq->xdp_prog); +- if (!xdp_prog) { +- ret = -ENXIO; ++ if (!xdp_prog) ++ return -ENXIO; ++ ++ sq = virtnet_xdp_sq(vi); ++ ++ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { ++ ret = -EINVAL; + drops = n; + goto out; + } diff --git a/queue-4.20/virtio_net-use-xdp_return_frame-to-free-xdp_frames-on-destroying-vqs.patch b/queue-4.20/virtio_net-use-xdp_return_frame-to-free-xdp_frames-on-destroying-vqs.patch new file mode 100644 index 00000000000..bea650ca310 --- /dev/null +++ b/queue-4.20/virtio_net-use-xdp_return_frame-to-free-xdp_frames-on-destroying-vqs.patch @@ -0,0 +1,34 @@ +From foo@baz Sat Feb 2 10:53:00 CET 2019 +From: Toshiaki Makita +Date: Tue, 29 Jan 2019 09:45:58 +0900 +Subject: virtio_net: Use xdp_return_frame to free xdp_frames on destroying vqs + +From: Toshiaki Makita + +[ Upstream commit 07b344f494ddda9f061b396407c96df8c46c82b5 ] + +put_page() can work as a fallback for freeing xdp_frames, but the +appropriate way is to use xdp_return_frame(). + +Fixes: cac320c850ef ("virtio_net: convert to use generic xdp_frame and xdp_return_frame API") +Signed-off-by: Toshiaki Makita +Acked-by: Jason Wang +Acked-by: Jesper Dangaard Brouer +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -2642,7 +2642,7 @@ static void free_unused_bufs(struct virt + if (!is_xdp_raw_buffer_queue(vi, i)) + dev_kfree_skb(buf); + else +- put_page(virt_to_head_page(buf)); ++ xdp_return_frame(buf); + } + } +