]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.0-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 20 Apr 2019 14:44:19 +0000 (16:44 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 20 Apr 2019 14:44:19 +0000 (16:44 +0200)
added patches:
bonding-fix-event-handling-for-stacked-bonds.patch
failover-allow-name-change-on-iff_up-slave-interfaces.patch
ipv4-ensure-rcu_read_lock-in-ipv4_link_failure.patch
ipv4-recompile-ip-options-in-ipv4_link_failure.patch
mlxsw-core-do-not-use-wq_mem_reclaim-for-emad-workqueue.patch
mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-ordered-workqueue.patch
mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-workqueue.patch
mlxsw-spectrum_router-do-not-check-vrf-mac-address.patch
mlxsw-spectrum_switchdev-add-mdb-entries-in-prepare-phase.patch
net-atm-fix-potential-spectre-v1-vulnerabilities.patch
net-bridge-fix-netlink-export-of-vlan_stats_per_port-option.patch
net-bridge-fix-per-port-af_packet-sockets.patch
net-bridge-multicast-use-rcu-to-access-port-list-from-br_multicast_start_querier.patch
net-fec-manage-ahb-clock-in-runtime-pm.patch
net-fix-missing-meta-data-in-skb-with-vlan-packet.patch
net-fou-do-not-use-guehdr-after-iptunnel_pull_offloads-in-gue_udp_recv.patch
net-mlx5-fpga-tls-hold-rcu-read-lock-a-bit-longer.patch
net-mlx5-fpga-tls-idr-remove-on-flow-delete.patch
net-mlx5e-protect-against-non-uplink-representor-for-encap.patch
net-mlx5e-rx-check-ip-headers-sanity.patch
net-mlx5e-rx-fixup-skb-checksum-for-packets-with-tail-padding.patch
net-mlx5e-switch-to-toeplitz-rss-hash-by-default.patch
net-mlx5e-xdp-avoid-checksum-complete-when-xdp-prog-is-loaded.patch
net-strparser-partially-revert-strparser-call-skb_unclone-conditionally.patch
net-thunderx-don-t-allow-jumbo-frames-with-xdp.patch
net-thunderx-raise-xdp-mtu-to-1508.patch
net-tls-don-t-leak-partially-sent-record-in-device-mode.patch
net-tls-fix-build-without-config_tls_device.patch
net-tls-fix-the-iv-leaks.patch
net-tls-prevent-bad-memory-access-in-tls_is_sk_tx_device_offloaded.patch
nfc-nci-add-some-bounds-checking-in-nci_hci_cmd_received.patch
nfc-nci-potential-off-by-one-in-pipes-array.patch
nfp-flower-remove-vlan-cfi-bit-from-push-vlan-action.patch
nfp-flower-replace-cfi-with-vlan-present.patch
revert-net-mlx5e-enable-reporting-checksum-unnecessary-also-for-l3-packets.patch
route-avoid-crash-from-dereferencing-null-rt-from.patch
sch_cake-make-sure-we-can-write-the-ip-header-before-changing-dscp-bits.patch
sch_cake-simplify-logic-in-cake_select_tin.patch
sch_cake-use-tc_skb_protocol-helper-for-getting-packet-protocol.patch
tcp-tcp_grow_window-needs-to-respect-tcp_space.patch
team-set-slave-to-promisc-if-team-is-already-in-promisc-mode.patch
tipc-missing-entries-in-name-table-of-publications.patch
vhost-reject-zero-size-iova-range.patch

44 files changed:
queue-5.0/bonding-fix-event-handling-for-stacked-bonds.patch [new file with mode: 0644]
queue-5.0/failover-allow-name-change-on-iff_up-slave-interfaces.patch [new file with mode: 0644]
queue-5.0/ipv4-ensure-rcu_read_lock-in-ipv4_link_failure.patch [new file with mode: 0644]
queue-5.0/ipv4-recompile-ip-options-in-ipv4_link_failure.patch [new file with mode: 0644]
queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-emad-workqueue.patch [new file with mode: 0644]
queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-ordered-workqueue.patch [new file with mode: 0644]
queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-workqueue.patch [new file with mode: 0644]
queue-5.0/mlxsw-spectrum_router-do-not-check-vrf-mac-address.patch [new file with mode: 0644]
queue-5.0/mlxsw-spectrum_switchdev-add-mdb-entries-in-prepare-phase.patch [new file with mode: 0644]
queue-5.0/net-atm-fix-potential-spectre-v1-vulnerabilities.patch [new file with mode: 0644]
queue-5.0/net-bridge-fix-netlink-export-of-vlan_stats_per_port-option.patch [new file with mode: 0644]
queue-5.0/net-bridge-fix-per-port-af_packet-sockets.patch [new file with mode: 0644]
queue-5.0/net-bridge-multicast-use-rcu-to-access-port-list-from-br_multicast_start_querier.patch [new file with mode: 0644]
queue-5.0/net-fec-manage-ahb-clock-in-runtime-pm.patch [new file with mode: 0644]
queue-5.0/net-fix-missing-meta-data-in-skb-with-vlan-packet.patch [new file with mode: 0644]
queue-5.0/net-fou-do-not-use-guehdr-after-iptunnel_pull_offloads-in-gue_udp_recv.patch [new file with mode: 0644]
queue-5.0/net-mlx5-fpga-tls-hold-rcu-read-lock-a-bit-longer.patch [new file with mode: 0644]
queue-5.0/net-mlx5-fpga-tls-idr-remove-on-flow-delete.patch [new file with mode: 0644]
queue-5.0/net-mlx5e-protect-against-non-uplink-representor-for-encap.patch [new file with mode: 0644]
queue-5.0/net-mlx5e-rx-check-ip-headers-sanity.patch [new file with mode: 0644]
queue-5.0/net-mlx5e-rx-fixup-skb-checksum-for-packets-with-tail-padding.patch [new file with mode: 0644]
queue-5.0/net-mlx5e-switch-to-toeplitz-rss-hash-by-default.patch [new file with mode: 0644]
queue-5.0/net-mlx5e-xdp-avoid-checksum-complete-when-xdp-prog-is-loaded.patch [new file with mode: 0644]
queue-5.0/net-strparser-partially-revert-strparser-call-skb_unclone-conditionally.patch [new file with mode: 0644]
queue-5.0/net-thunderx-don-t-allow-jumbo-frames-with-xdp.patch [new file with mode: 0644]
queue-5.0/net-thunderx-raise-xdp-mtu-to-1508.patch [new file with mode: 0644]
queue-5.0/net-tls-don-t-leak-partially-sent-record-in-device-mode.patch [new file with mode: 0644]
queue-5.0/net-tls-fix-build-without-config_tls_device.patch [new file with mode: 0644]
queue-5.0/net-tls-fix-the-iv-leaks.patch [new file with mode: 0644]
queue-5.0/net-tls-prevent-bad-memory-access-in-tls_is_sk_tx_device_offloaded.patch [new file with mode: 0644]
queue-5.0/nfc-nci-add-some-bounds-checking-in-nci_hci_cmd_received.patch [new file with mode: 0644]
queue-5.0/nfc-nci-potential-off-by-one-in-pipes-array.patch [new file with mode: 0644]
queue-5.0/nfp-flower-remove-vlan-cfi-bit-from-push-vlan-action.patch [new file with mode: 0644]
queue-5.0/nfp-flower-replace-cfi-with-vlan-present.patch [new file with mode: 0644]
queue-5.0/revert-net-mlx5e-enable-reporting-checksum-unnecessary-also-for-l3-packets.patch [new file with mode: 0644]
queue-5.0/route-avoid-crash-from-dereferencing-null-rt-from.patch [new file with mode: 0644]
queue-5.0/sch_cake-make-sure-we-can-write-the-ip-header-before-changing-dscp-bits.patch [new file with mode: 0644]
queue-5.0/sch_cake-simplify-logic-in-cake_select_tin.patch [new file with mode: 0644]
queue-5.0/sch_cake-use-tc_skb_protocol-helper-for-getting-packet-protocol.patch [new file with mode: 0644]
queue-5.0/series [new file with mode: 0644]
queue-5.0/tcp-tcp_grow_window-needs-to-respect-tcp_space.patch [new file with mode: 0644]
queue-5.0/team-set-slave-to-promisc-if-team-is-already-in-promisc-mode.patch [new file with mode: 0644]
queue-5.0/tipc-missing-entries-in-name-table-of-publications.patch [new file with mode: 0644]
queue-5.0/vhost-reject-zero-size-iova-range.patch [new file with mode: 0644]

diff --git a/queue-5.0/bonding-fix-event-handling-for-stacked-bonds.patch b/queue-5.0/bonding-fix-event-handling-for-stacked-bonds.patch
new file mode 100644 (file)
index 0000000..42d7459
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Fri, 12 Apr 2019 15:04:10 +0200
+Subject: bonding: fix event handling for stacked bonds
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit 92480b3977fd3884649d404cbbaf839b70035699 ]
+
+When a bond is enslaved to another bond, bond_netdev_event() only
+handles the event as if the bond is a master, and skips treating the
+bond as a slave.
+
+This leads to a refcount leak on the slave, since we don't remove the
+adjacency to its master and the master holds a reference on the slave.
+
+Reproducer:
+  ip link add bondL type bond
+  ip link add bondU type bond
+  ip link set bondL master bondU
+  ip link del bondL
+
+No "Fixes:" tag, this code is older than git history.
+
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -3214,8 +3214,12 @@ static int bond_netdev_event(struct noti
+               return NOTIFY_DONE;
+       if (event_dev->flags & IFF_MASTER) {
++              int ret;
++
+               netdev_dbg(event_dev, "IFF_MASTER\n");
+-              return bond_master_netdev_event(event, event_dev);
++              ret = bond_master_netdev_event(event, event_dev);
++              if (ret != NOTIFY_DONE)
++                      return ret;
+       }
+       if (event_dev->flags & IFF_SLAVE) {
diff --git a/queue-5.0/failover-allow-name-change-on-iff_up-slave-interfaces.patch b/queue-5.0/failover-allow-name-change-on-iff_up-slave-interfaces.patch
new file mode 100644 (file)
index 0000000..89440f7
--- /dev/null
@@ -0,0 +1,145 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Mon, 8 Apr 2019 19:45:27 -0400
+Subject: failover: allow name change on IFF_UP slave interfaces
+
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+
+[ Upstream commit 8065a779f17e94536a1c4dcee4f9d88011672f97 ]
+
+When a netdev appears through hot plug then gets enslaved by a failover
+master that is already up and running, the slave will be opened
+right away after getting enslaved. Today there's a race that userspace
+(udev) may fail to rename the slave if the kernel (net_failover)
+opens the slave earlier than when the userspace rename happens.
+Unlike bond or team, the primary slave of failover can't be renamed by
+userspace ahead of time, since the kernel initiated auto-enslavement is
+unable to, or rather, is never meant to be synchronized with the rename
+request from userspace.
+
+As the failover slave interfaces are not designed to be operated
+directly by userspace apps: IP configuration, filter rules with
+regard to network traffic passing and etc., should all be done on master
+interface. In general, userspace apps only care about the
+name of master interface, while slave names are less important as long
+as admin users can see reliable names that may carry
+other information describing the netdev. For e.g., they can infer that
+"ens3nsby" is a standby slave of "ens3", while for a
+name like "eth0" they can't tell which master it belongs to.
+
+Historically the name of IFF_UP interface can't be changed because
+there might be admin script or management software that is already
+relying on such behavior and assumes that the slave name can't be
+changed once UP. But failover is special: with the in-kernel
+auto-enslavement mechanism, the userspace expectation for device
+enumeration and bring-up order is already broken. Previously initramfs
+and various userspace config tools were modified to bypass failover
+slaves because of auto-enslavement and duplicate MAC address. Similarly,
+in case that users care about seeing reliable slave name, the new type
+of failover slaves needs to be taken care of specifically in userspace
+anyway.
+
+It's less risky to lift up the rename restriction on failover slave
+which is already UP. Although it's possible this change may potentially
+break userspace component (most likely configuration scripts or
+management software) that assumes slave name can't be changed while
+UP, it's relatively a limited and controllable set among all userspace
+components, which can be fixed specifically to listen for the rename
+events on failover slaves. Userspace component interacting with slaves
+is expected to be changed to operate on failover master interface
+instead, as the failover slave is dynamic in nature which may come and
+go at any point.  The goal is to make the role of failover slaves less
+relevant, and userspace components should only deal with failover master
+in the long run.
+
+Fixes: 30c8bd5aa8b2 ("net: Introduce generic failover module")
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Reviewed-by: Liran Alon <liran.alon@oracle.com>
+Acked-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netdevice.h |    3 +++
+ net/core/dev.c            |   16 +++++++++++++++-
+ net/core/failover.c       |    6 +++---
+ 3 files changed, 21 insertions(+), 4 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1484,6 +1484,7 @@ struct net_device_ops {
+  * @IFF_FAILOVER: device is a failover master device
+  * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
+  * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
++ * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
+  */
+ enum netdev_priv_flags {
+       IFF_802_1Q_VLAN                 = 1<<0,
+@@ -1516,6 +1517,7 @@ enum netdev_priv_flags {
+       IFF_FAILOVER                    = 1<<27,
+       IFF_FAILOVER_SLAVE              = 1<<28,
+       IFF_L3MDEV_RX_HANDLER           = 1<<29,
++      IFF_LIVE_RENAME_OK              = 1<<30,
+ };
+ #define IFF_802_1Q_VLAN                       IFF_802_1Q_VLAN
+@@ -1547,6 +1549,7 @@ enum netdev_priv_flags {
+ #define IFF_FAILOVER                  IFF_FAILOVER
+ #define IFF_FAILOVER_SLAVE            IFF_FAILOVER_SLAVE
+ #define IFF_L3MDEV_RX_HANDLER         IFF_L3MDEV_RX_HANDLER
++#define IFF_LIVE_RENAME_OK            IFF_LIVE_RENAME_OK
+ /**
+  *    struct net_device - The DEVICE structure.
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1184,7 +1184,21 @@ int dev_change_name(struct net_device *d
+       BUG_ON(!dev_net(dev));
+       net = dev_net(dev);
+-      if (dev->flags & IFF_UP)
++
++      /* Some auto-enslaved devices e.g. failover slaves are
++       * special, as userspace might rename the device after
++       * the interface had been brought up and running since
++       * the point kernel initiated auto-enslavement. Allow
++       * live name change even when these slave devices are
++       * up and running.
++       *
++       * Typically, users of these auto-enslaving devices
++       * don't actually care about slave name change, as
++       * they are supposed to operate on master interface
++       * directly.
++       */
++      if (dev->flags & IFF_UP &&
++          likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
+               return -EBUSY;
+       write_seqcount_begin(&devnet_rename_seq);
+--- a/net/core/failover.c
++++ b/net/core/failover.c
+@@ -80,14 +80,14 @@ static int failover_slave_register(struc
+               goto err_upper_link;
+       }
+-      slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
++      slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
+       if (fops && fops->slave_register &&
+           !fops->slave_register(slave_dev, failover_dev))
+               return NOTIFY_OK;
+       netdev_upper_dev_unlink(slave_dev, failover_dev);
+-      slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
++      slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
+ err_upper_link:
+       netdev_rx_handler_unregister(slave_dev);
+ done:
+@@ -121,7 +121,7 @@ int failover_slave_unregister(struct net
+       netdev_rx_handler_unregister(slave_dev);
+       netdev_upper_dev_unlink(slave_dev, failover_dev);
+-      slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
++      slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
+       if (fops && fops->slave_unregister &&
+           !fops->slave_unregister(slave_dev, failover_dev))
diff --git a/queue-5.0/ipv4-ensure-rcu_read_lock-in-ipv4_link_failure.patch b/queue-5.0/ipv4-ensure-rcu_read_lock-in-ipv4_link_failure.patch
new file mode 100644 (file)
index 0000000..b5da459
--- /dev/null
@@ -0,0 +1,88 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 13 Apr 2019 17:32:21 -0700
+Subject: ipv4: ensure rcu_read_lock() in ipv4_link_failure()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit c543cb4a5f07e09237ec0fc2c60c9f131b2c79ad ]
+
+fib_compute_spec_dst() needs to be called under rcu protection.
+
+syzbot reported :
+
+WARNING: suspicious RCU usage
+5.1.0-rc4+ #165 Not tainted
+include/linux/inetdevice.h:220 suspicious rcu_dereference_check() usage!
+
+other info that might help us debug this:
+
+rcu_scheduler_active = 2, debug_locks = 1
+1 lock held by swapper/0/0:
+ #0: 0000000051b67925 ((&n->timer)){+.-.}, at: lockdep_copy_map include/linux/lockdep.h:170 [inline]
+ #0: 0000000051b67925 ((&n->timer)){+.-.}, at: call_timer_fn+0xda/0x720 kernel/time/timer.c:1315
+
+stack backtrace:
+CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.1.0-rc4+ #165
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x172/0x1f0 lib/dump_stack.c:113
+ lockdep_rcu_suspicious+0x153/0x15d kernel/locking/lockdep.c:5162
+ __in_dev_get_rcu include/linux/inetdevice.h:220 [inline]
+ fib_compute_spec_dst+0xbbd/0x1030 net/ipv4/fib_frontend.c:294
+ spec_dst_fill net/ipv4/ip_options.c:245 [inline]
+ __ip_options_compile+0x15a7/0x1a10 net/ipv4/ip_options.c:343
+ ipv4_link_failure+0x172/0x400 net/ipv4/route.c:1195
+ dst_link_failure include/net/dst.h:427 [inline]
+ arp_error_report+0xd1/0x1c0 net/ipv4/arp.c:297
+ neigh_invalidate+0x24b/0x570 net/core/neighbour.c:995
+ neigh_timer_handler+0xc35/0xf30 net/core/neighbour.c:1081
+ call_timer_fn+0x190/0x720 kernel/time/timer.c:1325
+ expire_timers kernel/time/timer.c:1362 [inline]
+ __run_timers kernel/time/timer.c:1681 [inline]
+ __run_timers kernel/time/timer.c:1649 [inline]
+ run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694
+ __do_softirq+0x266/0x95a kernel/softirq.c:293
+ invoke_softirq kernel/softirq.c:374 [inline]
+ irq_exit+0x180/0x1d0 kernel/softirq.c:414
+ exiting_irq arch/x86/include/asm/apic.h:536 [inline]
+ smp_apic_timer_interrupt+0x14a/0x570 arch/x86/kernel/apic/apic.c:1062
+ apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807
+
+Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Cc: Stephen Suryaputra <ssuryaextr@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1185,14 +1185,20 @@ static struct dst_entry *ipv4_dst_check(
+ static void ipv4_link_failure(struct sk_buff *skb)
+ {
+-      struct rtable *rt;
+       struct ip_options opt;
++      struct rtable *rt;
++      int res;
+       /* Recompile ip options since IPCB may not be valid anymore.
+        */
+       memset(&opt, 0, sizeof(opt));
+       opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+-      if (__ip_options_compile(dev_net(skb->dev), &opt, skb, NULL))
++
++      rcu_read_lock();
++      res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
++      rcu_read_unlock();
++
++      if (res)
+               return;
+       __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
diff --git a/queue-5.0/ipv4-recompile-ip-options-in-ipv4_link_failure.patch b/queue-5.0/ipv4-recompile-ip-options-in-ipv4_link_failure.patch
new file mode 100644 (file)
index 0000000..ddb7fb5
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Stephen Suryaputra <ssuryaextr@gmail.com>
+Date: Fri, 12 Apr 2019 16:19:27 -0400
+Subject: ipv4: recompile ip options in ipv4_link_failure
+
+From: Stephen Suryaputra <ssuryaextr@gmail.com>
+
+[ Upstream commit ed0de45a1008991fdaa27a0152befcb74d126a8b ]
+
+Recompile IP options since IPCB may not be valid anymore when
+ipv4_link_failure is called from arp_error_report.
+
+Refer to the commit 3da1ed7ac398 ("net: avoid use IPCB in cipso_v4_error")
+and the commit before that (9ef6b42ad6fd) for a similar issue.
+
+Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1186,8 +1186,16 @@ static struct dst_entry *ipv4_dst_check(
+ static void ipv4_link_failure(struct sk_buff *skb)
+ {
+       struct rtable *rt;
++      struct ip_options opt;
+-      icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
++      /* Recompile ip options since IPCB may not be valid anymore.
++       */
++      memset(&opt, 0, sizeof(opt));
++      opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
++      if (__ip_options_compile(dev_net(skb->dev), &opt, skb, NULL))
++              return;
++
++      __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
+       rt = skb_rtable(skb);
+       if (rt)
diff --git a/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-emad-workqueue.patch b/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-emad-workqueue.patch
new file mode 100644 (file)
index 0000000..e1c11f2
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Wed, 10 Apr 2019 06:58:13 +0000
+Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for EMAD workqueue
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit a8c133b06183c529c51cd0d54eb57d6b7078370c ]
+
+The EMAD workqueue is used to handle retransmission of EMAD packets that
+contain configuration data for the device's firmware.
+
+Given the workers need to allocate these packets and that the code is
+not called as part of memory reclaim path, remove the WQ_MEM_RECLAIM
+flag.
+
+Fixes: d965465b60ba ("mlxsw: core: Fix possible deadlock")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Acked-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
+@@ -568,7 +568,7 @@ static int mlxsw_emad_init(struct mlxsw_
+       if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX))
+               return 0;
+-      emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0);
++      emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0);
+       if (!emad_wq)
+               return -ENOMEM;
+       mlxsw_core->emad_wq = emad_wq;
diff --git a/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-ordered-workqueue.patch b/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-ordered-workqueue.patch
new file mode 100644 (file)
index 0000000..1ab4286
--- /dev/null
@@ -0,0 +1,77 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Wed, 10 Apr 2019 06:58:14 +0000
+Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw ordered workqueue
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit 4af0699782e2cc7d0d89db9eb6f8844dd3df82dc ]
+
+The ordered workqueue is used to offload various objects such as routes
+and neighbours in the order they are notified.
+
+It should not be called as part of memory reclaim path, so remove the
+WQ_MEM_RECLAIM flag. This can also result in a warning [1], if a worker
+tries to flush a non-WQ_MEM_RECLAIM workqueue.
+
+[1]
+[97703.542861] workqueue: WQ_MEM_RECLAIM mlxsw_core_ordered:mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] is flushing !WQ_MEM_RECLAIM events:rht_deferred_worker
+[97703.542884] WARNING: CPU: 1 PID: 32492 at kernel/workqueue.c:2605 check_flush_dependency+0xb5/0x130
+...
+[97703.542988] Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018
+[97703.543049] Workqueue: mlxsw_core_ordered mlxsw_sp_router_fib6_event_work [mlxsw_spectrum]
+[97703.543061] RIP: 0010:check_flush_dependency+0xb5/0x130
+...
+[97703.543071] RSP: 0018:ffffb3f08137bc00 EFLAGS: 00010086
+[97703.543076] RAX: 0000000000000000 RBX: ffff96e07740ae00 RCX: 0000000000000000
+[97703.543080] RDX: 0000000000000094 RSI: ffffffff82dc1934 RDI: 0000000000000046
+[97703.543084] RBP: ffffb3f08137bc20 R08: ffffffff82dc18a0 R09: 00000000000225c0
+[97703.543087] R10: 0000000000000000 R11: 0000000000007eec R12: ffffffff816e4ee0
+[97703.543091] R13: ffff96e06f6a5c00 R14: ffff96e077ba7700 R15: ffffffff812ab0c0
+[97703.543097] FS: 0000000000000000(0000) GS:ffff96e077a80000(0000) knlGS:0000000000000000
+[97703.543101] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[97703.543104] CR2: 00007f8cd135b280 CR3: 00000001e860e003 CR4: 00000000003606e0
+[97703.543109] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[97703.543112] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[97703.543115] Call Trace:
+[97703.543129] __flush_work+0xbd/0x1e0
+[97703.543137] ? __cancel_work_timer+0x136/0x1b0
+[97703.543145] ? pwq_dec_nr_in_flight+0x49/0xa0
+[97703.543154] __cancel_work_timer+0x136/0x1b0
+[97703.543175] ? mlxsw_reg_trans_bulk_wait+0x145/0x400 [mlxsw_core]
+[97703.543184] cancel_work_sync+0x10/0x20
+[97703.543191] rhashtable_free_and_destroy+0x23/0x140
+[97703.543198] rhashtable_destroy+0xd/0x10
+[97703.543254] mlxsw_sp_fib_destroy+0xb1/0xf0 [mlxsw_spectrum]
+[97703.543310] mlxsw_sp_vr_put+0xa8/0xc0 [mlxsw_spectrum]
+[97703.543364] mlxsw_sp_fib_node_put+0xbf/0x140 [mlxsw_spectrum]
+[97703.543418] ? mlxsw_sp_fib6_entry_destroy+0xe8/0x110 [mlxsw_spectrum]
+[97703.543475] mlxsw_sp_router_fib6_event_work+0x6cd/0x7f0 [mlxsw_spectrum]
+[97703.543484] process_one_work+0x1fd/0x400
+[97703.543493] worker_thread+0x34/0x410
+[97703.543500] kthread+0x121/0x140
+[97703.543507] ? process_one_work+0x400/0x400
+[97703.543512] ? kthread_park+0x90/0x90
+[97703.543523] ret_from_fork+0x35/0x40
+
+Fixes: a3832b31898f ("mlxsw: core: Create an ordered workqueue for FIB offload")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reported-by: Semion Lisyansky <semionl@mellanox.com>
+Acked-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
+@@ -1915,7 +1915,7 @@ static int __init mlxsw_core_module_init
+       mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0);
+       if (!mlxsw_wq)
+               return -ENOMEM;
+-      mlxsw_owq = alloc_ordered_workqueue("%s_ordered", WQ_MEM_RECLAIM,
++      mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0,
+                                           mlxsw_core_driver_name);
+       if (!mlxsw_owq) {
+               err = -ENOMEM;
diff --git a/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-workqueue.patch b/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-workqueue.patch
new file mode 100644 (file)
index 0000000..22a9019
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Wed, 10 Apr 2019 06:58:15 +0000
+Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw workqueue
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit b442fed1b724af0de087912a5718ddde1b87acbb ]
+
+The workqueue is used to periodically update the networking stack about
+activity / statistics of various objects such as neighbours and TC
+actions.
+
+It should not be called as part of memory reclaim path, so remove the
+WQ_MEM_RECLAIM flag.
+
+Fixes: 3d5479e92087 ("mlxsw: core: Remove deprecated create_workqueue")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Acked-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
+@@ -1912,7 +1912,7 @@ static int __init mlxsw_core_module_init
+ {
+       int err;
+-      mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0);
++      mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0);
+       if (!mlxsw_wq)
+               return -ENOMEM;
+       mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0,
diff --git a/queue-5.0/mlxsw-spectrum_router-do-not-check-vrf-mac-address.patch b/queue-5.0/mlxsw-spectrum_router-do-not-check-vrf-mac-address.patch
new file mode 100644 (file)
index 0000000..e9383d5
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Wed, 10 Apr 2019 06:58:15 +0000
+Subject: mlxsw: spectrum_router: Do not check VRF MAC address
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit 972fae683cbad5cf348268e76abc6d55cfb3ba87 ]
+
+Commit 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC
+addresses") enabled the driver to veto router interface (RIF) MAC
+addresses that it cannot support.
+
+This check should only be performed for interfaces for which the driver
+actually configures a RIF. A VRF upper is not one of them, so ignore it.
+
+Without this patch it is not possible to set an IP address on the VRF
+device and use it as a loopback.
+
+Fixes: 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC addresses")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reported-by: Alexander Petrovskiy <alexpe@mellanox.com>
+Tested-by: Alexander Petrovskiy <alexpe@mellanox.com>
+Acked-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+@@ -6745,7 +6745,7 @@ static int mlxsw_sp_router_port_check_ri
+       /* A RIF is not created for macvlan netdevs. Their MAC is used to
+        * populate the FDB
+        */
+-      if (netif_is_macvlan(dev))
++      if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
+               return 0;
+       for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
diff --git a/queue-5.0/mlxsw-spectrum_switchdev-add-mdb-entries-in-prepare-phase.patch b/queue-5.0/mlxsw-spectrum_switchdev-add-mdb-entries-in-prepare-phase.patch
new file mode 100644 (file)
index 0000000..dbcb818
--- /dev/null
@@ -0,0 +1,66 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Wed, 10 Apr 2019 06:58:12 +0000
+Subject: mlxsw: spectrum_switchdev: Add MDB entries in prepare phase
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit d4d0e40977ac450f32f2db5e4d8e23c9d2578899 ]
+
+The driver cannot guarantee in the prepare phase that it will be able to
+write an MDB entry to the device. In case the driver returned success
+during the prepare phase, but then failed to add the entry in the commit
+phase, a WARNING [1] will be generated by the switchdev core.
+
+Fix this by doing the work in the prepare phase instead.
+
+[1]
+[  358.544486] swp12s0: Commit of object (id=2) failed.
+[  358.550061] WARNING: CPU: 0 PID: 30 at net/switchdev/switchdev.c:281 switchdev_port_obj_add_now+0x9b/0xe0
+[  358.560754] CPU: 0 PID: 30 Comm: kworker/0:1 Not tainted 5.0.0-custom-13382-gf2449babf221 #1350
+[  358.570472] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016
+[  358.580582] Workqueue: events switchdev_deferred_process_work
+[  358.587001] RIP: 0010:switchdev_port_obj_add_now+0x9b/0xe0
+...
+[  358.614109] RSP: 0018:ffffa6b900d6fe18 EFLAGS: 00010286
+[  358.619943] RAX: 0000000000000000 RBX: ffff8b00797ff000 RCX: 0000000000000000
+[  358.627912] RDX: ffff8b00b7a1d4c0 RSI: ffff8b00b7a152e8 RDI: ffff8b00b7a152e8
+[  358.635881] RBP: ffff8b005c3f5bc0 R08: 000000000000022b R09: 0000000000000000
+[  358.643850] R10: 0000000000000000 R11: ffffa6b900d6fcc8 R12: 0000000000000000
+[  358.651819] R13: dead000000000100 R14: ffff8b00b65a23c0 R15: 0ffff8b00b7a2200
+[  358.659790] FS:  0000000000000000(0000) GS:ffff8b00b7a00000(0000) knlGS:0000000000000000
+[  358.668820] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  358.675228] CR2: 00007f00aad90de0 CR3: 00000001ca80d000 CR4: 00000000001006f0
+[  358.683188] Call Trace:
+[  358.685918]  switchdev_port_obj_add_deferred+0x13/0x60
+[  358.691655]  switchdev_deferred_process+0x6b/0xf0
+[  358.696907]  switchdev_deferred_process_work+0xa/0x10
+[  358.702548]  process_one_work+0x1f5/0x3f0
+[  358.707022]  worker_thread+0x28/0x3c0
+[  358.711099]  ? process_one_work+0x3f0/0x3f0
+[  358.715768]  kthread+0x10d/0x130
+[  358.719369]  ? __kthread_create_on_node+0x180/0x180
+[  358.724815]  ret_from_fork+0x35/0x40
+
+Fixes: 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reported-by: Alex Kushnarov <alexanderk@mellanox.com>
+Tested-by: Alex Kushnarov <alexanderk@mellanox.com>
+Acked-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+@@ -1654,7 +1654,7 @@ static int mlxsw_sp_port_mdb_add(struct
+       u16 fid_index;
+       int err = 0;
+-      if (switchdev_trans_ph_prepare(trans))
++      if (switchdev_trans_ph_commit(trans))
+               return 0;
+       bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
diff --git a/queue-5.0/net-atm-fix-potential-spectre-v1-vulnerabilities.patch b/queue-5.0/net-atm-fix-potential-spectre-v1-vulnerabilities.patch
new file mode 100644 (file)
index 0000000..0e1835e
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
+Date: Mon, 15 Apr 2019 15:57:23 -0500
+Subject: net: atm: Fix potential Spectre v1 vulnerabilities
+
+From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
+
+[ Upstream commit 899537b73557aafbdd11050b501cf54b4f5c45af ]
+
+arg is controlled by user-space, hence leading to a potential
+exploitation of the Spectre variant 1 vulnerability.
+
+This issue was detected with the help of Smatch:
+
+net/atm/lec.c:715 lec_mcast_attach() warn: potential spectre issue 'dev_lec' [r] (local cap)
+
+Fix this by sanitizing arg before using it to index dev_lec.
+
+Notice that given that speculation windows are large, the policy is
+to kill the speculation on the first load and not worry if it can be
+completed with a dependent load/store [1].
+
+[1] https://lore.kernel.org/lkml/20180423164740.GY17484@dhcp22.suse.cz/
+
+Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/atm/lec.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/net/atm/lec.c
++++ b/net/atm/lec.c
+@@ -710,7 +710,10 @@ static int lec_vcc_attach(struct atm_vcc
+ static int lec_mcast_attach(struct atm_vcc *vcc, int arg)
+ {
+-      if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg])
++      if (arg < 0 || arg >= MAX_LEC_ITF)
++              return -EINVAL;
++      arg = array_index_nospec(arg, MAX_LEC_ITF);
++      if (!dev_lec[arg])
+               return -EINVAL;
+       vcc->proto_data = dev_lec[arg];
+       return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc);
+@@ -728,6 +731,7 @@ static int lecd_attach(struct atm_vcc *v
+               i = arg;
+       if (arg >= MAX_LEC_ITF)
+               return -EINVAL;
++      i = array_index_nospec(arg, MAX_LEC_ITF);
+       if (!dev_lec[i]) {
+               int size;
diff --git a/queue-5.0/net-bridge-fix-netlink-export-of-vlan_stats_per_port-option.patch b/queue-5.0/net-bridge-fix-netlink-export-of-vlan_stats_per_port-option.patch
new file mode 100644 (file)
index 0000000..8e9e859
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Tue, 16 Apr 2019 16:15:56 +0300
+Subject: net: bridge: fix netlink export of vlan_stats_per_port option
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+[ Upstream commit 600bea7dba1a72874ae0cd9bc66bf2abfe43b49d ]
+
+Since the introduction of the vlan_stats_per_port option the netlink
+export of it has been broken since I made a typo and used the ifla
+attribute instead of the bridge option to retrieve its state.
+Sysfs export is fine, only netlink export has been affected.
+
+Fixes: 9163a0fc1f0c0 ("net: bridge: add support for per-port vlan stats")
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_netlink.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/bridge/br_netlink.c
++++ b/net/bridge/br_netlink.c
+@@ -1441,7 +1441,7 @@ static int br_fill_info(struct sk_buff *
+           nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED,
+                      br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) ||
+           nla_put_u8(skb, IFLA_BR_VLAN_STATS_PER_PORT,
+-                     br_opt_get(br, IFLA_BR_VLAN_STATS_PER_PORT)))
++                     br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)))
+               return -EMSGSIZE;
+ #endif
+ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
diff --git a/queue-5.0/net-bridge-fix-per-port-af_packet-sockets.patch b/queue-5.0/net-bridge-fix-per-port-af_packet-sockets.patch
new file mode 100644 (file)
index 0000000..da5b5d2
--- /dev/null
@@ -0,0 +1,77 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Thu, 11 Apr 2019 13:56:39 +0300
+Subject: net: bridge: fix per-port af_packet sockets
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+[ Upstream commit 3b2e2904deb314cc77a2192f506f2fd44e3d10d0 ]
+
+When the commit below was introduced it changed two visible things:
+ - the skb was no longer passed through the protocol handlers with the
+   original device
+ - the skb was passed up the stack with skb->dev = bridge
+
+The first change broke af_packet sockets on bridge ports. For example we
+use them for hostapd which listens for ETH_P_PAE packets on the ports.
+We discussed two possible fixes:
+ - create a clone and pass it through NF_HOOK(), act on the original skb
+   based on the result
+ - somehow signal to the caller from the okfn() that it was called,
+   meaning the skb is ok to be passed, which this patch is trying to
+   implement via returning 1 from the bridge link-local okfn()
+
+Note that we rely on the fact that NF_QUEUE/STOLEN would return 0 and
+drop/error would return < 0 thus the okfn() is called only when the
+return was 1, so we signal to the caller that it was called by preserving
+the return value from nf_hook().
+
+Fixes: 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict")
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_input.c |   23 ++++++++++++++---------
+ 1 file changed, 14 insertions(+), 9 deletions(-)
+
+--- a/net/bridge/br_input.c
++++ b/net/bridge/br_input.c
+@@ -197,13 +197,10 @@ static void __br_handle_local_finish(str
+ /* note: already called with rcu_read_lock */
+ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+-      struct net_bridge_port *p = br_port_get_rcu(skb->dev);
+-
+       __br_handle_local_finish(skb);
+-      BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
+-      br_pass_frame_up(skb);
+-      return 0;
++      /* return 1 to signal the okfn() was called so it's ok to use the skb */
++      return 1;
+ }
+ /*
+@@ -280,10 +277,18 @@ rx_handler_result_t br_handle_frame(stru
+                               goto forward;
+               }
+-              /* Deliver packet to local host only */
+-              NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev),
+-                      NULL, skb, skb->dev, NULL, br_handle_local_finish);
+-              return RX_HANDLER_CONSUMED;
++              /* The else clause should be hit when nf_hook():
++               *   - returns < 0 (drop/error)
++               *   - returns = 0 (stolen/nf_queue)
++               * Thus return 1 from the okfn() to signal the skb is ok to pass
++               */
++              if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
++                          dev_net(skb->dev), NULL, skb, skb->dev, NULL,
++                          br_handle_local_finish) == 1) {
++                      return RX_HANDLER_PASS;
++              } else {
++                      return RX_HANDLER_CONSUMED;
++              }
+       }
+ forward:
diff --git a/queue-5.0/net-bridge-multicast-use-rcu-to-access-port-list-from-br_multicast_start_querier.patch b/queue-5.0/net-bridge-multicast-use-rcu-to-access-port-list-from-br_multicast_start_querier.patch
new file mode 100644 (file)
index 0000000..447f802
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Thu, 11 Apr 2019 15:08:25 +0300
+Subject: net: bridge: multicast: use rcu to access port list from br_multicast_start_querier
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+[ Upstream commit c5b493ce192bd7a4e7bd073b5685aad121eeef82 ]
+
+br_multicast_start_querier() walks over the port list but it can be
+called from a timer with only multicast_lock held which doesn't protect
+the port list, so use RCU to walk over it.
+
+Fixes: c83b8fab06fc ("bridge: Restart queries when last querier expires")
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_multicast.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -1916,7 +1916,8 @@ static void br_multicast_start_querier(s
+       __br_multicast_open(br, query);
+-      list_for_each_entry(port, &br->port_list, list) {
++      rcu_read_lock();
++      list_for_each_entry_rcu(port, &br->port_list, list) {
+               if (port->state == BR_STATE_DISABLED ||
+                   port->state == BR_STATE_BLOCKING)
+                       continue;
+@@ -1928,6 +1929,7 @@ static void br_multicast_start_querier(s
+                       br_multicast_enable(&port->ip6_own_query);
+ #endif
+       }
++      rcu_read_unlock();
+ }
+ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
diff --git a/queue-5.0/net-fec-manage-ahb-clock-in-runtime-pm.patch b/queue-5.0/net-fec-manage-ahb-clock-in-runtime-pm.patch
new file mode 100644 (file)
index 0000000..031bc52
--- /dev/null
@@ -0,0 +1,106 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Andy Duan <fugang.duan@nxp.com>
+Date: Tue, 9 Apr 2019 03:40:56 +0000
+Subject: net: fec: manage ahb clock in runtime pm
+
+From: Andy Duan <fugang.duan@nxp.com>
+
+[ Upstream commit d7c3a206e6338e4ccdf030719dec028e26a521d5 ]
+
+Some SOC like i.MX6SX clock have some limits:
+- ahb clock should be disabled before ipg.
+- ahb and ipg clocks are required for MAC MII bus.
+So, move the ahb clock to runtime management together with
+ipg clock.
+
+Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/fec_main.c |   30 +++++++++++++++++++++---------
+ 1 file changed, 21 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -1840,13 +1840,9 @@ static int fec_enet_clk_enable(struct ne
+       int ret;
+       if (enable) {
+-              ret = clk_prepare_enable(fep->clk_ahb);
+-              if (ret)
+-                      return ret;
+-
+               ret = clk_prepare_enable(fep->clk_enet_out);
+               if (ret)
+-                      goto failed_clk_enet_out;
++                      return ret;
+               if (fep->clk_ptp) {
+                       mutex_lock(&fep->ptp_clk_mutex);
+@@ -1866,7 +1862,6 @@ static int fec_enet_clk_enable(struct ne
+               phy_reset_after_clk_enable(ndev->phydev);
+       } else {
+-              clk_disable_unprepare(fep->clk_ahb);
+               clk_disable_unprepare(fep->clk_enet_out);
+               if (fep->clk_ptp) {
+                       mutex_lock(&fep->ptp_clk_mutex);
+@@ -1885,8 +1880,6 @@ failed_clk_ref:
+ failed_clk_ptp:
+       if (fep->clk_enet_out)
+               clk_disable_unprepare(fep->clk_enet_out);
+-failed_clk_enet_out:
+-              clk_disable_unprepare(fep->clk_ahb);
+       return ret;
+ }
+@@ -3470,6 +3463,9 @@ fec_probe(struct platform_device *pdev)
+       ret = clk_prepare_enable(fep->clk_ipg);
+       if (ret)
+               goto failed_clk_ipg;
++      ret = clk_prepare_enable(fep->clk_ahb);
++      if (ret)
++              goto failed_clk_ahb;
+       fep->reg_phy = devm_regulator_get_optional(&pdev->dev, "phy");
+       if (!IS_ERR(fep->reg_phy)) {
+@@ -3563,6 +3559,9 @@ failed_reset:
+       pm_runtime_put(&pdev->dev);
+       pm_runtime_disable(&pdev->dev);
+ failed_regulator:
++      clk_disable_unprepare(fep->clk_ahb);
++failed_clk_ahb:
++      clk_disable_unprepare(fep->clk_ipg);
+ failed_clk_ipg:
+       fec_enet_clk_enable(ndev, false);
+ failed_clk:
+@@ -3686,6 +3685,7 @@ static int __maybe_unused fec_runtime_su
+       struct net_device *ndev = dev_get_drvdata(dev);
+       struct fec_enet_private *fep = netdev_priv(ndev);
++      clk_disable_unprepare(fep->clk_ahb);
+       clk_disable_unprepare(fep->clk_ipg);
+       return 0;
+@@ -3695,8 +3695,20 @@ static int __maybe_unused fec_runtime_re
+ {
+       struct net_device *ndev = dev_get_drvdata(dev);
+       struct fec_enet_private *fep = netdev_priv(ndev);
++      int ret;
+-      return clk_prepare_enable(fep->clk_ipg);
++      ret = clk_prepare_enable(fep->clk_ahb);
++      if (ret)
++              return ret;
++      ret = clk_prepare_enable(fep->clk_ipg);
++      if (ret)
++              goto failed_clk_ipg;
++
++      return 0;
++
++failed_clk_ipg:
++      clk_disable_unprepare(fep->clk_ahb);
++      return ret;
+ }
+ static const struct dev_pm_ops fec_pm_ops = {
diff --git a/queue-5.0/net-fix-missing-meta-data-in-skb-with-vlan-packet.patch b/queue-5.0/net-fix-missing-meta-data-in-skb-with-vlan-packet.patch
new file mode 100644 (file)
index 0000000..5afb8d5
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Yuya Kusakabe <yuya.kusakabe@gmail.com>
+Date: Tue, 16 Apr 2019 10:22:28 +0900
+Subject: net: Fix missing meta data in skb with vlan packet
+
+From: Yuya Kusakabe <yuya.kusakabe@gmail.com>
+
+[ Upstream commit d85e8be2a5a02869f815dd0ac2d743deb4cd7957 ]
+
+skb_reorder_vlan_header() should move XDP meta data with ethernet header
+if XDP meta data exists.
+
+Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")
+Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com>
+Signed-off-by: Takeru Hayasaka <taketarou2@gmail.com>
+Co-developed-by: Takeru Hayasaka <taketarou2@gmail.com>
+Reviewed-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skbuff.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -5083,7 +5083,8 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_l
+ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
+ {
+-      int mac_len;
++      int mac_len, meta_len;
++      void *meta;
+       if (skb_cow(skb, skb_headroom(skb)) < 0) {
+               kfree_skb(skb);
+@@ -5095,6 +5096,13 @@ static struct sk_buff *skb_reorder_vlan_
+               memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
+                       mac_len - VLAN_HLEN - ETH_TLEN);
+       }
++
++      meta_len = skb_metadata_len(skb);
++      if (meta_len) {
++              meta = skb_metadata_end(skb) - meta_len;
++              memmove(meta + VLAN_HLEN, meta, meta_len);
++      }
++
+       skb->mac_header += VLAN_HLEN;
+       return skb;
+ }
diff --git a/queue-5.0/net-fou-do-not-use-guehdr-after-iptunnel_pull_offloads-in-gue_udp_recv.patch b/queue-5.0/net-fou-do-not-use-guehdr-after-iptunnel_pull_offloads-in-gue_udp_recv.patch
new file mode 100644 (file)
index 0000000..cdafe87
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Date: Tue, 9 Apr 2019 11:47:20 +0200
+Subject: net: fou: do not use guehdr after iptunnel_pull_offloads in gue_udp_recv
+
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+
+[ Upstream commit 988dc4a9a3b66be75b30405a5494faf0dc7cffb6 ]
+
+gue tunnels run iptunnel_pull_offloads on received skbs. This can
+determine a possible use-after-free accessing guehdr pointer since
+the packet will be 'uncloned' running pskb_expand_head if it is a
+cloned gso skb (e.g if the packet has been sent though a veth device)
+
+Fixes: a09a4c8dd1ec ("tunnels: Remove encapsulation offloads on decap")
+Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fou.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/fou.c
++++ b/net/ipv4/fou.c
+@@ -121,6 +121,7 @@ static int gue_udp_recv(struct sock *sk,
+       struct guehdr *guehdr;
+       void *data;
+       u16 doffset = 0;
++      u8 proto_ctype;
+       if (!fou)
+               return 1;
+@@ -212,13 +213,14 @@ static int gue_udp_recv(struct sock *sk,
+       if (unlikely(guehdr->control))
+               return gue_control_message(skb, guehdr);
++      proto_ctype = guehdr->proto_ctype;
+       __skb_pull(skb, sizeof(struct udphdr) + hdrlen);
+       skb_reset_transport_header(skb);
+       if (iptunnel_pull_offloads(skb))
+               goto drop;
+-      return -guehdr->proto_ctype;
++      return -proto_ctype;
+ drop:
+       kfree_skb(skb);
diff --git a/queue-5.0/net-mlx5-fpga-tls-hold-rcu-read-lock-a-bit-longer.patch b/queue-5.0/net-mlx5-fpga-tls-hold-rcu-read-lock-a-bit-longer.patch
new file mode 100644 (file)
index 0000000..b17bda4
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Saeed Mahameed <saeedm@mellanox.com>
+Date: Tue, 19 Mar 2019 22:09:05 -0700
+Subject: net/mlx5: FPGA, tls, hold rcu read lock a bit longer
+
+From: Saeed Mahameed <saeedm@mellanox.com>
+
+[ Upstream commit 31634bf5dcc418b5b2cacd954394c0c4620db6a2 ]
+
+To avoid use-after-free, hold the rcu read lock until we are done copying
+flow data into the command buffer.
+
+Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines")
+Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c |   18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+@@ -217,22 +217,22 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_
+       void *cmd;
+       int ret;
+-      rcu_read_lock();
+-      flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
+-      rcu_read_unlock();
+-
+-      if (!flow) {
+-              WARN_ONCE(1, "Received NULL pointer for handle\n");
+-              return -EINVAL;
+-      }
+-
+       buf = kzalloc(size, GFP_ATOMIC);
+       if (!buf)
+               return -ENOMEM;
+       cmd = (buf + 1);
++      rcu_read_lock();
++      flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
++      if (unlikely(!flow)) {
++              rcu_read_unlock();
++              WARN_ONCE(1, "Received NULL pointer for handle\n");
++              kfree(buf);
++              return -EINVAL;
++      }
+       mlx5_fpga_tls_flow_to_cmd(flow, cmd);
++      rcu_read_unlock();
+       MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
+       MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
diff --git a/queue-5.0/net-mlx5-fpga-tls-idr-remove-on-flow-delete.patch b/queue-5.0/net-mlx5-fpga-tls-idr-remove-on-flow-delete.patch
new file mode 100644 (file)
index 0000000..ca47af0
--- /dev/null
@@ -0,0 +1,139 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Saeed Mahameed <saeedm@mellanox.com>
+Date: Tue, 19 Mar 2019 01:05:41 -0700
+Subject: net/mlx5: FPGA, tls, idr remove on flow delete
+
+From: Saeed Mahameed <saeedm@mellanox.com>
+
+[ Upstream commit df3a8344d404a810b4aadbf19b08c8232fbaa715 ]
+
+Flow is kfreed on mlx5_fpga_tls_del_flow but kept in the idr data
+structure, this is risky and can cause use-after-free, since the
+idr_remove is delayed until tls_send_teardown_cmd completion.
+
+Instead of delaying idr_remove, in this patch we do it on
+mlx5_fpga_tls_del_flow, before actually kfree(flow).
+
+Added synchronize_rcu before kfree(flow)
+
+Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines")
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c |   43 +++++++--------------
+ 1 file changed, 15 insertions(+), 28 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+@@ -148,14 +148,16 @@ static int mlx5_fpga_tls_alloc_swid(stru
+       return ret;
+ }
+-static void mlx5_fpga_tls_release_swid(struct idr *idr,
+-                                     spinlock_t *idr_spinlock, u32 swid)
++static void *mlx5_fpga_tls_release_swid(struct idr *idr,
++                                      spinlock_t *idr_spinlock, u32 swid)
+ {
+       unsigned long flags;
++      void *ptr;
+       spin_lock_irqsave(idr_spinlock, flags);
+-      idr_remove(idr, swid);
++      ptr = idr_remove(idr, swid);
+       spin_unlock_irqrestore(idr_spinlock, flags);
++      return ptr;
+ }
+ static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
+@@ -165,20 +167,12 @@ static void mlx_tls_kfree_complete(struc
+       kfree(buf);
+ }
+-struct mlx5_teardown_stream_context {
+-      struct mlx5_fpga_tls_command_context cmd;
+-      u32 swid;
+-};
+-
+ static void
+ mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
+                                 struct mlx5_fpga_device *fdev,
+                                 struct mlx5_fpga_tls_command_context *cmd,
+                                 struct mlx5_fpga_dma_buf *resp)
+ {
+-      struct mlx5_teardown_stream_context *ctx =
+-                  container_of(cmd, struct mlx5_teardown_stream_context, cmd);
+-
+       if (resp) {
+               u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
+@@ -186,14 +180,6 @@ mlx5_fpga_tls_teardown_completion(struct
+                       mlx5_fpga_err(fdev,
+                                     "Teardown stream failed with syndrome = %d",
+                                     syndrome);
+-              else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx))
+-                      mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr,
+-                                                 &fdev->tls->tx_idr_spinlock,
+-                                                 ctx->swid);
+-              else
+-                      mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr,
+-                                                 &fdev->tls->rx_idr_spinlock,
+-                                                 ctx->swid);
+       }
+       mlx5_fpga_tls_put_command_ctx(cmd);
+ }
+@@ -253,7 +239,7 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_
+ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
+                                           void *flow, u32 swid, gfp_t flags)
+ {
+-      struct mlx5_teardown_stream_context *ctx;
++      struct mlx5_fpga_tls_command_context *ctx;
+       struct mlx5_fpga_dma_buf *buf;
+       void *cmd;
+@@ -261,7 +247,7 @@ static void mlx5_fpga_tls_send_teardown_
+       if (!ctx)
+               return;
+-      buf = &ctx->cmd.buf;
++      buf = &ctx->buf;
+       cmd = (ctx + 1);
+       MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM);
+       MLX5_SET(tls_cmd, cmd, swid, swid);
+@@ -272,8 +258,7 @@ static void mlx5_fpga_tls_send_teardown_
+       buf->sg[0].data = cmd;
+       buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+-      ctx->swid = swid;
+-      mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
++      mlx5_fpga_tls_cmd_send(mdev->fpga, ctx,
+                              mlx5_fpga_tls_teardown_completion);
+ }
+@@ -283,13 +268,14 @@ void mlx5_fpga_tls_del_flow(struct mlx5_
+       struct mlx5_fpga_tls *tls = mdev->fpga->tls;
+       void *flow;
+-      rcu_read_lock();
+       if (direction_sx)
+-              flow = idr_find(&tls->tx_idr, swid);
++              flow = mlx5_fpga_tls_release_swid(&tls->tx_idr,
++                                                &tls->tx_idr_spinlock,
++                                                swid);
+       else
+-              flow = idr_find(&tls->rx_idr, swid);
+-
+-      rcu_read_unlock();
++              flow = mlx5_fpga_tls_release_swid(&tls->rx_idr,
++                                                &tls->rx_idr_spinlock,
++                                                swid);
+       if (!flow) {
+               mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n",
+@@ -297,6 +283,7 @@ void mlx5_fpga_tls_del_flow(struct mlx5_
+               return;
+       }
++      synchronize_rcu(); /* before kfree(flow) */
+       mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags);
+ }
diff --git a/queue-5.0/net-mlx5e-protect-against-non-uplink-representor-for-encap.patch b/queue-5.0/net-mlx5e-protect-against-non-uplink-representor-for-encap.patch
new file mode 100644 (file)
index 0000000..7f089fb
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Dmytro Linkin <dmitrolin@mellanox.com>
+Date: Fri, 29 Mar 2019 12:50:37 +0000
+Subject: net/mlx5e: Protect against non-uplink representor for encap
+
+From: Dmytro Linkin <dmitrolin@mellanox.com>
+
+[ Upstream commit 5e0060b1491b299b1706414e61ede0b02265680e ]
+
+TC encap offload is supported only for the physical uplink
+representor. Fail for non uplink representor.
+
+Fixes: 3e621b19b0bb ("net/mlx5e: Support TC encapsulation offloads with upper devices")
+Signed-off-by: Dmytro Linkin <dmitrolin@mellanox.com>
+Reviewed-by: Eli Britstein <elibr@mellanox.com>
+Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
+Reviewed-by: Roi Dayan <roid@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+@@ -39,6 +39,10 @@ static int get_route_and_out_devs(struct
+                       return -EOPNOTSUPP;
+       }
++      if (!(mlx5e_eswitch_rep(*out_dev) &&
++            mlx5e_is_uplink_rep(netdev_priv(*out_dev))))
++              return -EOPNOTSUPP;
++
+       return 0;
+ }
diff --git a/queue-5.0/net-mlx5e-rx-check-ip-headers-sanity.patch b/queue-5.0/net-mlx5e-rx-check-ip-headers-sanity.patch
new file mode 100644 (file)
index 0000000..6a920c6
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Saeed Mahameed <saeedm@mellanox.com>
+Date: Mon, 25 Mar 2019 22:10:59 -0700
+Subject: net/mlx5e: Rx, Check ip headers sanity
+
+From: Saeed Mahameed <saeedm@mellanox.com>
+
+[ Upstream commit 0318a7b7fcad9765931146efa7ca3a034194737c ]
+
+In the two places is_last_ethertype_ip is being called, the caller will
+be looking inside the ip header, to be safe, add ip{4,6} header sanity
+check. And return true only on valid ip headers, i.e: the whole header
+is contained in the linear part of the skb.
+
+Note: Such situation is very rare and hard to reproduce, since mlx5e
+allocates a large enough headroom to contain the largest header one can
+imagine.
+
+Fixes: fe1dc069990c ("net/mlx5e: don't set CHECKSUM_COMPLETE on SCTP packets")
+Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -693,7 +693,14 @@ static inline bool is_last_ethertype_ip(
+ {
+       *proto = ((struct ethhdr *)skb->data)->h_proto;
+       *proto = __vlan_get_protocol(skb, *proto, network_depth);
+-      return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6));
++
++      if (*proto == htons(ETH_P_IP))
++              return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr));
++
++      if (*proto == htons(ETH_P_IPV6))
++              return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr));
++
++      return false;
+ }
+ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
diff --git a/queue-5.0/net-mlx5e-rx-fixup-skb-checksum-for-packets-with-tail-padding.patch b/queue-5.0/net-mlx5e-rx-fixup-skb-checksum-for-packets-with-tail-padding.patch
new file mode 100644 (file)
index 0000000..d7692b7
--- /dev/null
@@ -0,0 +1,183 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Saeed Mahameed <saeedm@mellanox.com>
+Date: Tue, 12 Mar 2019 00:24:52 -0700
+Subject: net/mlx5e: Rx, Fixup skb checksum for packets with tail padding
+
+From: Saeed Mahameed <saeedm@mellanox.com>
+
+[ Upstream commit 0aa1d18615c163f92935b806dcaff9157645233a ]
+
+When an ethernet frame with ip payload is padded, the padding octets are
+not covered by the hardware checksum.
+
+Prior to the cited commit, skb checksum was forced to be CHECKSUM_NONE
+when padding is detected. After it, the kernel will try to trim the
+padding bytes and subtract their checksum from skb->csum.
+
+In this patch we fixup skb->csum for any ip packet with tail padding of
+any size, if any padding found.
+FCS case is just one special case of this general purpose patch, hence,
+it is removed.
+
+Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"),
+Cc: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c    |   79 +++++++++++++++++----
+ drivers/net/ethernet/mellanox/mlx5/core/en_stats.c |    6 +
+ drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |    4 +
+ 3 files changed, 74 insertions(+), 15 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -713,17 +713,6 @@ static inline void mlx5e_enable_ecn(stru
+       rq->stats->ecn_mark += !!rc;
+ }
+-static u32 mlx5e_get_fcs(const struct sk_buff *skb)
+-{
+-      const void *fcs_bytes;
+-      u32 _fcs_bytes;
+-
+-      fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN,
+-                                     ETH_FCS_LEN, &_fcs_bytes);
+-
+-      return __get_unaligned_cpu32(fcs_bytes);
+-}
+-
+ static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
+ {
+       void *ip_p = skb->data + network_depth;
+@@ -734,6 +723,68 @@ static u8 get_ip_proto(struct sk_buff *s
+ #define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
++#define MAX_PADDING 8
++
++static void
++tail_padding_csum_slow(struct sk_buff *skb, int offset, int len,
++                     struct mlx5e_rq_stats *stats)
++{
++      stats->csum_complete_tail_slow++;
++      skb->csum = csum_block_add(skb->csum,
++                                 skb_checksum(skb, offset, len, 0),
++                                 offset);
++}
++
++static void
++tail_padding_csum(struct sk_buff *skb, int offset,
++                struct mlx5e_rq_stats *stats)
++{
++      u8 tail_padding[MAX_PADDING];
++      int len = skb->len - offset;
++      void *tail;
++
++      if (unlikely(len > MAX_PADDING)) {
++              tail_padding_csum_slow(skb, offset, len, stats);
++              return;
++      }
++
++      tail = skb_header_pointer(skb, offset, len, tail_padding);
++      if (unlikely(!tail)) {
++              tail_padding_csum_slow(skb, offset, len, stats);
++              return;
++      }
++
++      stats->csum_complete_tail++;
++      skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset);
++}
++
++static void
++mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto,
++                     struct mlx5e_rq_stats *stats)
++{
++      struct ipv6hdr *ip6;
++      struct iphdr   *ip4;
++      int pkt_len;
++
++      switch (proto) {
++      case htons(ETH_P_IP):
++              ip4 = (struct iphdr *)(skb->data + network_depth);
++              pkt_len = network_depth + ntohs(ip4->tot_len);
++              break;
++      case htons(ETH_P_IPV6):
++              ip6 = (struct ipv6hdr *)(skb->data + network_depth);
++              pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
++              break;
++      default:
++              return;
++      }
++
++      if (likely(pkt_len >= skb->len))
++              return;
++
++      tail_padding_csum(skb, pkt_len, stats);
++}
++
+ static inline void mlx5e_handle_csum(struct net_device *netdev,
+                                    struct mlx5_cqe64 *cqe,
+                                    struct mlx5e_rq *rq,
+@@ -782,10 +833,8 @@ static inline void mlx5e_handle_csum(str
+                       skb->csum = csum_partial(skb->data + ETH_HLEN,
+                                                network_depth - ETH_HLEN,
+                                                skb->csum);
+-              if (unlikely(netdev->features & NETIF_F_RXFCS))
+-                      skb->csum = csum_block_add(skb->csum,
+-                                                 (__force __wsum)mlx5e_get_fcs(skb),
+-                                                 skb->len - ETH_FCS_LEN);
++
++              mlx5e_skb_padding_csum(skb, network_depth, proto, stats);
+               stats->csum_complete++;
+               return;
+       }
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+@@ -59,6 +59,8 @@ static const struct counter_desc sw_stat
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
++      { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) },
++      { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) },
+@@ -151,6 +153,8 @@ void mlx5e_grp_sw_update_stats(struct ml
+               s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
+               s->rx_csum_none += rq_stats->csum_none;
+               s->rx_csum_complete += rq_stats->csum_complete;
++              s->rx_csum_complete_tail += rq_stats->csum_complete_tail;
++              s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow;
+               s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
+               s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
+               s->rx_xdp_drop     += rq_stats->xdp_drop;
+@@ -1192,6 +1196,8 @@ static const struct counter_desc rq_stat
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
++      { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
++      { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+@@ -71,6 +71,8 @@ struct mlx5e_sw_stats {
+       u64 rx_csum_unnecessary;
+       u64 rx_csum_none;
+       u64 rx_csum_complete;
++      u64 rx_csum_complete_tail;
++      u64 rx_csum_complete_tail_slow;
+       u64 rx_csum_unnecessary_inner;
+       u64 rx_xdp_drop;
+       u64 rx_xdp_redirect;
+@@ -181,6 +183,8 @@ struct mlx5e_rq_stats {
+       u64 packets;
+       u64 bytes;
+       u64 csum_complete;
++      u64 csum_complete_tail;
++      u64 csum_complete_tail_slow;
+       u64 csum_unnecessary;
+       u64 csum_unnecessary_inner;
+       u64 csum_none;
diff --git a/queue-5.0/net-mlx5e-switch-to-toeplitz-rss-hash-by-default.patch b/queue-5.0/net-mlx5e-switch-to-toeplitz-rss-hash-by-default.patch
new file mode 100644 (file)
index 0000000..9564aa6
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Date: Fri, 31 Aug 2018 14:29:16 +0300
+Subject: net/mlx5e: Switch to Toeplitz RSS hash by default
+
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+
+[ Upstream commit 7ee2ace9c544a0886e02b54b625e521df8692d20 ]
+
+Although XOR hash function can perform very well on some special use
+cases, to align with all drivers, mlx5 driver should use Toeplitz hash
+by default.
+Toeplitz is more stable for the general use case and it is more standard
+and reliable.
+
+On top of that, since XOR (MLX5_RX_HASH_FN_INVERTED_XOR8) gives only a
+repeated 8 bits pattern. When used for udp tunneling RSS source port
+manipulation it results in fixed source port, which will cause bad RSS
+spread.
+
+Fixes: 2be6967cdbc9 ("net/mlx5e: Support ETH_RSS_HASH_XOR")
+Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -4574,7 +4574,7 @@ void mlx5e_build_rss_params(struct mlx5e
+ {
+       enum mlx5e_traffic_types tt;
+-      rss_params->hfunc = ETH_RSS_HASH_XOR;
++      rss_params->hfunc = ETH_RSS_HASH_TOP;
+       netdev_rss_key_fill(rss_params->toeplitz_hash_key,
+                           sizeof(rss_params->toeplitz_hash_key));
+       mlx5e_build_default_indir_rqt(rss_params->indirection_rqt,
diff --git a/queue-5.0/net-mlx5e-xdp-avoid-checksum-complete-when-xdp-prog-is-loaded.patch b/queue-5.0/net-mlx5e-xdp-avoid-checksum-complete-when-xdp-prog-is-loaded.patch
new file mode 100644 (file)
index 0000000..74b55e7
--- /dev/null
@@ -0,0 +1,64 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Saeed Mahameed <saeedm@mellanox.com>
+Date: Thu, 21 Mar 2019 19:07:20 -0700
+Subject: net/mlx5e: XDP, Avoid checksum complete when XDP prog is loaded
+
+From: Saeed Mahameed <saeedm@mellanox.com>
+
+[ Upstream commit 5d0bb3bac4b9f6c22280b04545626fdfd99edc6b ]
+
+XDP programs might change packets data contents which will make the
+reported skb checksum (checksum complete) invalid.
+
+When XDP programs are loaded/unloaded set/clear rx RQs
+MLX5E_RQ_STATE_NO_CSUM_COMPLETE flag.
+
+Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support")
+Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c |    3 ++-
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c    |    6 +++++-
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c      |    3 ++-
+ 3 files changed, 9 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+@@ -1665,7 +1665,8 @@ static int set_pflag_rx_no_csum_complete
+       struct mlx5e_channel *c;
+       int i;
+-      if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
++      if (!test_bit(MLX5E_STATE_OPENED, &priv->state) ||
++          priv->channels.params.xdp_prog)
+               return 0;
+       for (i = 0; i < channels->num; i++) {
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -950,7 +950,11 @@ static int mlx5e_open_rq(struct mlx5e_ch
+       if (params->rx_dim_enabled)
+               __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
+-      if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE))
++      /* We disable csum_complete when XDP is enabled since
++       * XDP programs might manipulate packets which will render
++       * skb->checksum incorrect.
++       */
++      if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
+               __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+       return 0;
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -753,7 +753,8 @@ static inline void mlx5e_handle_csum(str
+               return;
+       }
+-      if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)))
++      /* True when explicitly set via priv flag, or XDP prog is loaded */
++      if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
+               goto csum_unnecessary;
+       /* CQE csum doesn't cover padding octets in short ethernet
diff --git a/queue-5.0/net-strparser-partially-revert-strparser-call-skb_unclone-conditionally.patch b/queue-5.0/net-strparser-partially-revert-strparser-call-skb_unclone-conditionally.patch
new file mode 100644 (file)
index 0000000..7b43b2a
--- /dev/null
@@ -0,0 +1,90 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+Date: Wed, 10 Apr 2019 11:04:32 -0700
+Subject: net: strparser: partially revert "strparser: Call skb_unclone conditionally"
+
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+
+[ Upstream commit 4a9c2e3746e6151fd5d077259d79ce9ca86d47d7 ]
+
+This reverts the first part of commit 4e485d06bb8c ("strparser: Call
+skb_unclone conditionally").  To build a message with multiple
+fragments we need our own root of frag_list.  We can't simply
+use the frag_list of orig_skb, because it will lead to linking
+all orig_skbs together creating very long frag chains, and causing
+stack overflow on kfree_skb() (which is called recursively on
+the frag_lists).
+
+BUG: stack guard page was hit at 00000000d40fad41 (stack is 0000000029dde9f4..000000008cce03d5)
+kernel stack overflow (double-fault): 0000 [#1] PREEMPT SMP
+RIP: 0010:free_one_page+0x2b/0x490
+
+Call Trace:
+  __free_pages_ok+0x143/0x2c0
+  skb_release_data+0x8e/0x140
+  ? skb_release_data+0xad/0x140
+  kfree_skb+0x32/0xb0
+
+  [...]
+
+  skb_release_data+0xad/0x140
+  ? skb_release_data+0xad/0x140
+  kfree_skb+0x32/0xb0
+  skb_release_data+0xad/0x140
+  ? skb_release_data+0xad/0x140
+  kfree_skb+0x32/0xb0
+  skb_release_data+0xad/0x140
+  ? skb_release_data+0xad/0x140
+  kfree_skb+0x32/0xb0
+  skb_release_data+0xad/0x140
+  ? skb_release_data+0xad/0x140
+  kfree_skb+0x32/0xb0
+  skb_release_data+0xad/0x140
+  __kfree_skb+0xe/0x20
+  tcp_disconnect+0xd6/0x4d0
+  tcp_close+0xf4/0x430
+  ? tcp_check_oom+0xf0/0xf0
+  tls_sk_proto_close+0xe4/0x1e0 [tls]
+  inet_release+0x36/0x60
+  __sock_release+0x37/0xa0
+  sock_close+0x11/0x20
+  __fput+0xa2/0x1d0
+  task_work_run+0x89/0xb0
+  exit_to_usermode_loop+0x9a/0xa0
+  do_syscall_64+0xc0/0xf0
+  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Let's leave the second unclone conditional, as I'm not entirely
+sure what is its purpose :)
+
+Fixes: 4e485d06bb8c ("strparser: Call skb_unclone conditionally")
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/strparser/strparser.c |   12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/net/strparser/strparser.c
++++ b/net/strparser/strparser.c
+@@ -140,13 +140,11 @@ static int __strp_recv(read_descriptor_t
+                       /* We are going to append to the frags_list of head.
+                        * Need to unshare the frag_list.
+                        */
+-                      if (skb_has_frag_list(head)) {
+-                              err = skb_unclone(head, GFP_ATOMIC);
+-                              if (err) {
+-                                      STRP_STATS_INCR(strp->stats.mem_fail);
+-                                      desc->error = err;
+-                                      return 0;
+-                              }
++                      err = skb_unclone(head, GFP_ATOMIC);
++                      if (err) {
++                              STRP_STATS_INCR(strp->stats.mem_fail);
++                              desc->error = err;
++                              return 0;
+                       }
+                       if (unlikely(skb_shinfo(head)->frag_list)) {
diff --git a/queue-5.0/net-thunderx-don-t-allow-jumbo-frames-with-xdp.patch b/queue-5.0/net-thunderx-don-t-allow-jumbo-frames-with-xdp.patch
new file mode 100644 (file)
index 0000000..ef8ec8c
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Matteo Croce <mcroce@redhat.com>
+Date: Thu, 11 Apr 2019 12:26:33 +0200
+Subject: net: thunderx: don't allow jumbo frames with XDP
+
+From: Matteo Croce <mcroce@redhat.com>
+
+[ Upstream commit 1f227d16083b2e280b7dde4ca78883d75593f2fd ]
+
+The thunderx driver forbids to load an eBPF program if the MTU is too high,
+but this can be circumvented by loading the eBPF, then raising the MTU.
+
+Fix this by limiting the MTU if an eBPF program is already loaded.
+
+Fixes: 05c773f52b96e ("net: thunderx: Add basic XDP support")
+Signed-off-by: Matteo Croce <mcroce@redhat.com>
+Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cavium/thunder/nicvf_main.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+@@ -1589,6 +1589,15 @@ static int nicvf_change_mtu(struct net_d
+       struct nicvf *nic = netdev_priv(netdev);
+       int orig_mtu = netdev->mtu;
++      /* For now just support only the usual MTU sized frames,
++       * plus some headroom for VLAN, QinQ.
++       */
++      if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) {
++              netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
++                          netdev->mtu);
++              return -EINVAL;
++      }
++
+       netdev->mtu = new_mtu;
+       if (!netif_running(netdev))
diff --git a/queue-5.0/net-thunderx-raise-xdp-mtu-to-1508.patch b/queue-5.0/net-thunderx-raise-xdp-mtu-to-1508.patch
new file mode 100644 (file)
index 0000000..cdf25ee
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Matteo Croce <mcroce@redhat.com>
+Date: Thu, 11 Apr 2019 12:26:32 +0200
+Subject: net: thunderx: raise XDP MTU to 1508
+
+From: Matteo Croce <mcroce@redhat.com>
+
+[ Upstream commit 5ee15c101f29e0093ffb5448773ccbc786eb313b ]
+
+The thunderx driver splits frames bigger than 1530 bytes to multiple
+pages, making impossible to run an eBPF program on it.
+This leads to a maximum MTU of 1508 if QinQ is in use.
+
+The thunderx driver forbids to load an eBPF program if the MTU is higher
+than 1500 bytes. Raise the limit to 1508 so it is possible to use L2
+protocols which need some more headroom.
+
+Fixes: 05c773f52b96e ("net: thunderx: Add basic XDP support")
+Signed-off-by: Matteo Croce <mcroce@redhat.com>
+Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cavium/thunder/nicvf_main.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+@@ -32,6 +32,13 @@
+ #define DRV_NAME      "nicvf"
+ #define DRV_VERSION   "1.0"
++/* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs
++ * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed
++ * this value, keeping headroom for the 14 byte Ethernet header and two
++ * VLAN tags (for QinQ)
++ */
++#define MAX_XDP_MTU   (1530 - ETH_HLEN - VLAN_HLEN * 2)
++
+ /* Supported devices */
+ static const struct pci_device_id nicvf_id_table[] = {
+       { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
+@@ -1830,8 +1837,10 @@ static int nicvf_xdp_setup(struct nicvf
+       bool bpf_attached = false;
+       int ret = 0;
+-      /* For now just support only the usual MTU sized frames */
+-      if (prog && (dev->mtu > 1500)) {
++      /* For now just support only the usual MTU sized frames,
++       * plus some headroom for VLAN, QinQ.
++       */
++      if (prog && dev->mtu > MAX_XDP_MTU) {
+               netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
+                           dev->mtu);
+               return -EOPNOTSUPP;
diff --git a/queue-5.0/net-tls-don-t-leak-partially-sent-record-in-device-mode.patch b/queue-5.0/net-tls-don-t-leak-partially-sent-record-in-device-mode.patch
new file mode 100644 (file)
index 0000000..5b28961
--- /dev/null
@@ -0,0 +1,132 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+Date: Wed, 10 Apr 2019 11:04:31 -0700
+Subject: net/tls: don't leak partially sent record in device mode
+
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+
+[ Upstream commit 35b71a34ada62c9573847a324bf06a133fe11b11 ]
+
+David reports that tls triggers warnings related to
+sk->sk_forward_alloc not being zero at destruction time:
+
+WARNING: CPU: 5 PID: 6831 at net/core/stream.c:206 sk_stream_kill_queues+0x103/0x110
+WARNING: CPU: 5 PID: 6831 at net/ipv4/af_inet.c:160 inet_sock_destruct+0x15b/0x170
+
+When sender fills up the write buffer and dies from
+SIGPIPE.  This is due to the device implementation
+not cleaning up the partially_sent_record.
+
+This is because commit a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
+moved the partial record cleanup to the SW-only path.
+
+Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
+Reported-by: David Beckett <david.beckett@netronome.com>
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
+Reviewed-by: Simon Horman <simon.horman@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tls.h    |    2 ++
+ net/tls/tls_device.c |    7 +++++++
+ net/tls/tls_main.c   |   22 ++++++++++++++++++++++
+ net/tls/tls_sw.c     |   15 +--------------
+ 4 files changed, 32 insertions(+), 14 deletions(-)
+
+--- a/include/net/tls.h
++++ b/include/net/tls.h
+@@ -289,6 +289,7 @@ int tls_device_sendmsg(struct sock *sk,
+ int tls_device_sendpage(struct sock *sk, struct page *page,
+                       int offset, size_t size, int flags);
+ void tls_device_sk_destruct(struct sock *sk);
++void tls_device_free_resources_tx(struct sock *sk);
+ void tls_device_init(void);
+ void tls_device_cleanup(void);
+ int tls_tx_records(struct sock *sk, int flags);
+@@ -312,6 +313,7 @@ int tls_push_sg(struct sock *sk, struct
+               int flags);
+ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
+                           int flags);
++bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx);
+ int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx,
+                                  int flags, long *timeo);
+--- a/net/tls/tls_device.c
++++ b/net/tls/tls_device.c
+@@ -219,6 +219,13 @@ void tls_device_sk_destruct(struct sock
+ }
+ EXPORT_SYMBOL(tls_device_sk_destruct);
++void tls_device_free_resources_tx(struct sock *sk)
++{
++      struct tls_context *tls_ctx = tls_get_ctx(sk);
++
++      tls_free_partial_record(sk, tls_ctx);
++}
++
+ static void tls_append_frag(struct tls_record_info *record,
+                           struct page_frag *pfrag,
+                           int size)
+--- a/net/tls/tls_main.c
++++ b/net/tls/tls_main.c
+@@ -220,6 +220,26 @@ int tls_push_pending_closed_record(struc
+               return tls_ctx->push_pending_record(sk, flags);
+ }
++bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx)
++{
++      struct scatterlist *sg;
++
++      sg = ctx->partially_sent_record;
++      if (!sg)
++              return false;
++
++      while (1) {
++              put_page(sg_page(sg));
++              sk_mem_uncharge(sk, sg->length);
++
++              if (sg_is_last(sg))
++                      break;
++              sg++;
++      }
++      ctx->partially_sent_record = NULL;
++      return true;
++}
++
+ static void tls_write_space(struct sock *sk)
+ {
+       struct tls_context *ctx = tls_get_ctx(sk);
+@@ -278,6 +298,8 @@ static void tls_sk_proto_close(struct so
+               kfree(ctx->tx.rec_seq);
+               kfree(ctx->tx.iv);
+               tls_sw_free_resources_tx(sk);
++      } else if (ctx->tx_conf == TLS_HW) {
++              tls_device_free_resources_tx(sk);
+       }
+       if (ctx->rx_conf == TLS_SW) {
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1804,20 +1804,7 @@ void tls_sw_free_resources_tx(struct soc
+       /* Free up un-sent records in tx_list. First, free
+        * the partially sent record if any at head of tx_list.
+        */
+-      if (tls_ctx->partially_sent_record) {
+-              struct scatterlist *sg = tls_ctx->partially_sent_record;
+-
+-              while (1) {
+-                      put_page(sg_page(sg));
+-                      sk_mem_uncharge(sk, sg->length);
+-
+-                      if (sg_is_last(sg))
+-                              break;
+-                      sg++;
+-              }
+-
+-              tls_ctx->partially_sent_record = NULL;
+-
++      if (tls_free_partial_record(sk, tls_ctx)) {
+               rec = list_first_entry(&ctx->tx_list,
+                                      struct tls_rec, list);
+               list_del(&rec->list);
diff --git a/queue-5.0/net-tls-fix-build-without-config_tls_device.patch b/queue-5.0/net-tls-fix-build-without-config_tls_device.patch
new file mode 100644 (file)
index 0000000..eba0ad9
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+Date: Wed, 10 Apr 2019 16:23:39 -0700
+Subject: net/tls: fix build without CONFIG_TLS_DEVICE
+
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+
+[ Upstream commit 903f1a187776bb8d79b13618ec05b25f86318885 ]
+
+buildbot noticed that TLS_HW is not defined if CONFIG_TLS_DEVICE=n.
+Wrap the cleanup branch into an ifdef, tls_device_free_resources_tx()
+wouldn't be compiled either in this case.
+
+Fixes: 35b71a34ada6 ("net/tls: don't leak partially sent record in device mode")
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tls/tls_main.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/tls/tls_main.c
++++ b/net/tls/tls_main.c
+@@ -298,8 +298,10 @@ static void tls_sk_proto_close(struct so
+               kfree(ctx->tx.rec_seq);
+               kfree(ctx->tx.iv);
+               tls_sw_free_resources_tx(sk);
++#ifdef CONFIG_TLS_DEVICE
+       } else if (ctx->tx_conf == TLS_HW) {
+               tls_device_free_resources_tx(sk);
++#endif
+       }
+       if (ctx->rx_conf == TLS_SW) {
diff --git a/queue-5.0/net-tls-fix-the-iv-leaks.patch b/queue-5.0/net-tls-fix-the-iv-leaks.patch
new file mode 100644 (file)
index 0000000..08c0be0
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+Date: Wed, 10 Apr 2019 11:04:30 -0700
+Subject: net/tls: fix the IV leaks
+
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+
+[ Upstream commit 5a03bc73abed6ae196c15e9950afde19d48be12c ]
+
+Commit f66de3ee2c16 ("net/tls: Split conf to rx + tx") made
+freeing of IV and record sequence number conditional to SW
+path only, but commit e8f69799810c ("net/tls: Add generic NIC
+offload infrastructure") also allocates that state for the
+device offload configuration.  Remember to free it.
+
+Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure")
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
+Reviewed-by: Simon Horman <simon.horman@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tls/tls_device.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/tls/tls_device.c
++++ b/net/tls/tls_device.c
+@@ -52,8 +52,11 @@ static DEFINE_SPINLOCK(tls_device_lock);
+ static void tls_device_free_ctx(struct tls_context *ctx)
+ {
+-      if (ctx->tx_conf == TLS_HW)
++      if (ctx->tx_conf == TLS_HW) {
+               kfree(tls_offload_ctx_tx(ctx));
++              kfree(ctx->tx.rec_seq);
++              kfree(ctx->tx.iv);
++      }
+       if (ctx->rx_conf == TLS_HW)
+               kfree(tls_offload_ctx_rx(ctx));
diff --git a/queue-5.0/net-tls-prevent-bad-memory-access-in-tls_is_sk_tx_device_offloaded.patch b/queue-5.0/net-tls-prevent-bad-memory-access-in-tls_is_sk_tx_device_offloaded.patch
new file mode 100644 (file)
index 0000000..abc3250
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+Date: Mon, 8 Apr 2019 17:59:50 -0700
+Subject: net/tls: prevent bad memory access in tls_is_sk_tx_device_offloaded()
+
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+
+[ Upstream commit b4f47f3848eb70986f75d06112af7b48b7f5f462 ]
+
+Unlike '&&' operator, the '&' does not have short-circuit
+evaluation semantics.  IOW both sides of the operator always
+get evaluated.  Fix the wrong operator in
+tls_is_sk_tx_device_offloaded(), which would lead to
+out-of-bounds access for for non-full sockets.
+
+Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload")
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
+Reviewed-by: Simon Horman <simon.horman@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tls.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/net/tls.h
++++ b/include/net/tls.h
+@@ -366,7 +366,7 @@ tls_validate_xmit_skb(struct sock *sk, s
+ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
+ {
+ #ifdef CONFIG_SOCK_VALIDATE_XMIT
+-      return sk_fullsock(sk) &
++      return sk_fullsock(sk) &&
+              (smp_load_acquire(&sk->sk_validate_xmit_skb) ==
+              &tls_validate_xmit_skb);
+ #else
diff --git a/queue-5.0/nfc-nci-add-some-bounds-checking-in-nci_hci_cmd_received.patch b/queue-5.0/nfc-nci-add-some-bounds-checking-in-nci_hci_cmd_received.patch
new file mode 100644 (file)
index 0000000..7f8d7fe
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Wed, 3 Apr 2019 10:12:48 +0300
+Subject: NFC: nci: Add some bounds checking in nci_hci_cmd_received()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit d7ee81ad09f072eab1681877fc71ec05f9c1ae92 ]
+
+This is similar to commit 674d9de02aa7 ("NFC: Fix possible memory
+corruption when handling SHDLC I-Frame commands").
+
+I'm not totally sure, but I think that commit description may have
+overstated the danger.  I was under the impression that this data came
+from the firmware?  If you can't trust your networking firmware, then
+you're already in trouble.
+
+Anyway, these days we add bounds checking where ever we can and we call
+it kernel hardening.  Better safe than sorry.
+
+Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/nfc/nci/hci.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/net/nfc/nci/hci.c
++++ b/net/nfc/nci/hci.c
+@@ -312,6 +312,10 @@ static void nci_hci_cmd_received(struct
+               create_info = (struct nci_hci_create_pipe_resp *)skb->data;
+               dest_gate = create_info->dest_gate;
+               new_pipe = create_info->pipe;
++              if (new_pipe >= NCI_HCI_MAX_PIPES) {
++                      status = NCI_HCI_ANY_E_NOK;
++                      goto exit;
++              }
+               /* Save the new created pipe and bind with local gate,
+                * the description for skb->data[3] is destination gate id
+@@ -336,6 +340,10 @@ static void nci_hci_cmd_received(struct
+                       goto exit;
+               }
+               delete_info = (struct nci_hci_delete_pipe_noti *)skb->data;
++              if (delete_info->pipe >= NCI_HCI_MAX_PIPES) {
++                      status = NCI_HCI_ANY_E_NOK;
++                      goto exit;
++              }
+               ndev->hci_dev->pipes[delete_info->pipe].gate =
+                                               NCI_HCI_INVALID_GATE;
diff --git a/queue-5.0/nfc-nci-potential-off-by-one-in-pipes-array.patch b/queue-5.0/nfc-nci-potential-off-by-one-in-pipes-array.patch
new file mode 100644 (file)
index 0000000..3cff102
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Wed, 3 Apr 2019 10:13:51 +0300
+Subject: nfc: nci: Potential off by one in ->pipes[] array
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit 6491d698396fd5da4941980a35ca7c162a672016 ]
+
+This is similar to commit e285d5bfb7e9 ("NFC: Fix the number of pipes")
+where we changed NFC_HCI_MAX_PIPES from 127 to 128.
+
+As the comment next to the define explains, the pipe identifier is 7
+bits long.  The highest possible pipe is 127, but the number of possible
+pipes is 128.  As the code is now, then there is potential for an
+out of bounds array access:
+
+    net/nfc/nci/hci.c:297 nci_hci_cmd_received() warn: array off by one?
+    'ndev->hci_dev->pipes[pipe]' '0-127 == 127'
+
+Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/nfc/nci_core.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/net/nfc/nci_core.h
++++ b/include/net/nfc/nci_core.h
+@@ -166,7 +166,7 @@ struct nci_conn_info {
+  * According to specification 102 622 chapter 4.4 Pipes,
+  * the pipe identifier is 7 bits long.
+  */
+-#define NCI_HCI_MAX_PIPES          127
++#define NCI_HCI_MAX_PIPES          128
+ struct nci_hci_gate {
+       u8 gate;
diff --git a/queue-5.0/nfp-flower-remove-vlan-cfi-bit-from-push-vlan-action.patch b/queue-5.0/nfp-flower-remove-vlan-cfi-bit-from-push-vlan-action.patch
new file mode 100644 (file)
index 0000000..4df5516
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
+Date: Mon, 1 Apr 2019 19:36:34 -0700
+Subject: nfp: flower: remove vlan CFI bit from push vlan action
+
+From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
+
+[ Upstream commit 42cd5484a22f1a1b947e21e2af65fa7dab09d017 ]
+
+We no longer set CFI when pushing vlan tags, therefore we remove
+the CFI bit from push vlan.
+
+Fixes: 1a1e586f54bf ("nfp: add basic action capabilities to flower offloads")
+Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
+Signed-off-by: Louis Peens <louis.peens@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/netronome/nfp/flower/action.c |    3 +--
+ drivers/net/ethernet/netronome/nfp/flower/cmsg.h   |    1 -
+ 2 files changed, 1 insertion(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
++++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
+@@ -49,8 +49,7 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan
+       tmp_push_vlan_tci =
+               FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, tcf_vlan_push_prio(action)) |
+-              FIELD_PREP(NFP_FL_PUSH_VLAN_VID, tcf_vlan_push_vid(action)) |
+-              NFP_FL_PUSH_VLAN_CFI;
++              FIELD_PREP(NFP_FL_PUSH_VLAN_VID, tcf_vlan_push_vid(action));
+       push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
+ }
+--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
++++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+@@ -82,7 +82,6 @@
+ #define NFP_FL_OUT_FLAGS_TYPE_IDX     GENMASK(2, 0)
+ #define NFP_FL_PUSH_VLAN_PRIO         GENMASK(15, 13)
+-#define NFP_FL_PUSH_VLAN_CFI          BIT(12)
+ #define NFP_FL_PUSH_VLAN_VID          GENMASK(11, 0)
+ #define IPV6_FLOW_LABEL_MASK          cpu_to_be32(0x000fffff)
diff --git a/queue-5.0/nfp-flower-replace-cfi-with-vlan-present.patch b/queue-5.0/nfp-flower-replace-cfi-with-vlan-present.patch
new file mode 100644 (file)
index 0000000..4b62ca8
--- /dev/null
@@ -0,0 +1,57 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
+Date: Mon, 1 Apr 2019 19:36:33 -0700
+Subject: nfp: flower: replace CFI with vlan present
+
+From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
+
+[ Upstream commit f7ee799a51ddbcc205ef615fe424fb5084e9e0aa ]
+
+Replace vlan CFI bit with a vlan present bit that indicates the
+presence of a vlan tag. Previously the driver incorrectly assumed
+that an vlan id of 0 is not matchable, therefore we indicate vlan
+presence with a vlan present bit.
+
+Fixes: 5571e8c9f241 ("nfp: extend flower matching capabilities")
+Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
+Signed-off-by: Louis Peens <louis.peens@netronome.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/netronome/nfp/flower/cmsg.h  |    2 +-
+ drivers/net/ethernet/netronome/nfp/flower/match.c |   14 ++++++--------
+ 2 files changed, 7 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
++++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+@@ -26,7 +26,7 @@
+ #define NFP_FLOWER_LAYER2_GENEVE_OP   BIT(6)
+ #define NFP_FLOWER_MASK_VLAN_PRIO     GENMASK(15, 13)
+-#define NFP_FLOWER_MASK_VLAN_CFI      BIT(12)
++#define NFP_FLOWER_MASK_VLAN_PRESENT  BIT(12)
+ #define NFP_FLOWER_MASK_VLAN_VID      GENMASK(11, 0)
+ #define NFP_FLOWER_MASK_MPLS_LB               GENMASK(31, 12)
+--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
++++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
+@@ -26,14 +26,12 @@ nfp_flower_compile_meta_tci(struct nfp_f
+                                                     FLOW_DISSECTOR_KEY_VLAN,
+                                                     target);
+               /* Populate the tci field. */
+-              if (flow_vlan->vlan_id || flow_vlan->vlan_priority) {
+-                      tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+-                                           flow_vlan->vlan_priority) |
+-                                FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+-                                           flow_vlan->vlan_id) |
+-                                NFP_FLOWER_MASK_VLAN_CFI;
+-                      frame->tci = cpu_to_be16(tmp_tci);
+-              }
++              tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
++              tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
++                                    flow_vlan->vlan_priority) |
++                         FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
++                                    flow_vlan->vlan_id);
++              frame->tci = cpu_to_be16(tmp_tci);
+       }
+ }
diff --git a/queue-5.0/revert-net-mlx5e-enable-reporting-checksum-unnecessary-also-for-l3-packets.patch b/queue-5.0/revert-net-mlx5e-enable-reporting-checksum-unnecessary-also-for-l3-packets.patch
new file mode 100644 (file)
index 0000000..694f53e
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Or Gerlitz <ogerlitz@mellanox.com>
+Date: Sun, 31 Mar 2019 12:53:03 +0000
+Subject: Revert "net/mlx5e: Enable reporting checksum unnecessary also for L3 packets"
+
+From: Or Gerlitz <ogerlitz@mellanox.com>
+
+[ Upstream commit 8c8811d46d00d119ffbe039a6e52a0b504df1c2c ]
+
+This reverts commit b820e6fb0978f9c2ac438c199d2bb2f35950e9c9.
+
+Prior the commit we are reverting, checksum unnecessary was only set when
+both the L3 OK and L4 OK bits are set on the CQE. This caused packets
+of IP protocols such as SCTP which are not dealt by the current HW L4
+parser (hence the L4 OK bit is not set, but the L4 header type none bit
+is set) to go through the checksum none code, where currently we wrongly
+report checksum unnecessary for them, a regression. Fix this by a revert.
+
+Note that on our usual track we report checksum complete, so the revert
+isn't expected to have any notable performance impact. Also, when we are
+not on the checksum complete track, the L4 protocols for which we report
+checksum none are not high performance ones, we will still report
+checksum unnecessary for UDP/TCP.
+
+Fixes: b820e6fb0978 ("net/mlx5e: Enable reporting checksum unnecessary also for L3 packets")
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Reported-by: Avi Urman <aviu@mellanox.com>
+Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+@@ -848,8 +848,7 @@ static inline void mlx5e_handle_csum(str
+ csum_unnecessary:
+       if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
+-                 ((cqe->hds_ip_ext & CQE_L4_OK) ||
+-                  (get_cqe_l4_hdr_type(cqe) == CQE_L4_HDR_TYPE_NONE)))) {
++                 (cqe->hds_ip_ext & CQE_L4_OK))) {
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+               if (cqe_is_tunneled(cqe)) {
+                       skb->csum_level = 1;
diff --git a/queue-5.0/route-avoid-crash-from-dereferencing-null-rt-from.patch b/queue-5.0/route-avoid-crash-from-dereferencing-null-rt-from.patch
new file mode 100644 (file)
index 0000000..f6a4f6c
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Jonathan Lemon <jonathan.lemon@gmail.com>
+Date: Sun, 14 Apr 2019 14:21:29 -0700
+Subject: route: Avoid crash from dereferencing NULL rt->from
+
+From: Jonathan Lemon <jonathan.lemon@gmail.com>
+
+[ Upstream commit 9c69a13205151c0d801de9f9d83a818e6e8f60ec ]
+
+When __ip6_rt_update_pmtu() is called, rt->from is RCU dereferenced, but is
+never checked for null - rt6_flush_exceptions() may have removed the entry.
+
+[ 1913.989004] RIP: 0010:ip6_rt_cache_alloc+0x13/0x170
+[ 1914.209410] Call Trace:
+[ 1914.214798]  <IRQ>
+[ 1914.219226]  __ip6_rt_update_pmtu+0xb0/0x190
+[ 1914.228649]  ip6_tnl_xmit+0x2c2/0x970 [ip6_tunnel]
+[ 1914.239223]  ? ip6_tnl_parse_tlv_enc_lim+0x32/0x1a0 [ip6_tunnel]
+[ 1914.252489]  ? __gre6_xmit+0x148/0x530 [ip6_gre]
+[ 1914.262678]  ip6gre_tunnel_xmit+0x17e/0x3c7 [ip6_gre]
+[ 1914.273831]  dev_hard_start_xmit+0x8d/0x1f0
+[ 1914.283061]  sch_direct_xmit+0xfa/0x230
+[ 1914.291521]  __qdisc_run+0x154/0x4b0
+[ 1914.299407]  net_tx_action+0x10e/0x1f0
+[ 1914.307678]  __do_softirq+0xca/0x297
+[ 1914.315567]  irq_exit+0x96/0xa0
+[ 1914.322494]  smp_apic_timer_interrupt+0x68/0x130
+[ 1914.332683]  apic_timer_interrupt+0xf/0x20
+[ 1914.341721]  </IRQ>
+
+Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected")
+Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Reviewed-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -2336,6 +2336,10 @@ static void __ip6_rt_update_pmtu(struct
+               rcu_read_lock();
+               from = rcu_dereference(rt6->from);
++              if (!from) {
++                      rcu_read_unlock();
++                      return;
++              }
+               nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
+               if (nrt6) {
+                       rt6_do_update_pmtu(nrt6, mtu);
diff --git a/queue-5.0/sch_cake-make-sure-we-can-write-the-ip-header-before-changing-dscp-bits.patch b/queue-5.0/sch_cake-make-sure-we-can-write-the-ip-header-before-changing-dscp-bits.patch
new file mode 100644 (file)
index 0000000..aa4c644
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: "Toke Høiland-Jørgensen" <toke@redhat.com>
+Date: Thu, 4 Apr 2019 15:01:33 +0200
+Subject: sch_cake: Make sure we can write the IP header before changing DSCP bits
+
+From: "Toke Høiland-Jørgensen" <toke@redhat.com>
+
+[ Upstream commit c87b4ecdbe8db27867a7b7f840291cd843406bd7 ]
+
+There is not actually any guarantee that the IP headers are valid before we
+access the DSCP bits of the packets. Fix this using the same approach taken
+in sch_dsmark.
+
+Reported-by: Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk>
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_cake.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -1524,16 +1524,27 @@ static void cake_wash_diffserv(struct sk
+ static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+ {
++      int wlen = skb_network_offset(skb);
+       u8 dscp;
+       switch (tc_skb_protocol(skb)) {
+       case htons(ETH_P_IP):
++              wlen += sizeof(struct iphdr);
++              if (!pskb_may_pull(skb, wlen) ||
++                  skb_try_make_writable(skb, wlen))
++                      return 0;
++
+               dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+               if (wash && dscp)
+                       ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+               return dscp;
+       case htons(ETH_P_IPV6):
++              wlen += sizeof(struct ipv6hdr);
++              if (!pskb_may_pull(skb, wlen) ||
++                  skb_try_make_writable(skb, wlen))
++                      return 0;
++
+               dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+               if (wash && dscp)
+                       ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
diff --git a/queue-5.0/sch_cake-simplify-logic-in-cake_select_tin.patch b/queue-5.0/sch_cake-simplify-logic-in-cake_select_tin.patch
new file mode 100644 (file)
index 0000000..a1f7602
--- /dev/null
@@ -0,0 +1,86 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: "Toke Høiland-Jørgensen" <toke@redhat.com>
+Date: Fri, 5 Apr 2019 15:01:59 +0200
+Subject: sch_cake: Simplify logic in cake_select_tin()
+
+From: "Toke Høiland-Jørgensen" <toke@redhat.com>
+
+[ Upstream commit 4976e3c683f328bc6f2edef555a4ffee6524486f ]
+
+The logic in cake_select_tin() was getting a bit hairy, and it turns out we
+can simplify it quite a bit. This also allows us to get rid of one of the
+two diffserv parsing functions, which has the added benefit that
+already-zeroed DSCP fields won't get re-written.
+
+Suggested-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_cake.c |   44 ++++++++++++++++----------------------------
+ 1 file changed, 16 insertions(+), 28 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -1508,20 +1508,6 @@ static unsigned int cake_drop(struct Qdi
+       return idx + (tin << 16);
+ }
+-static void cake_wash_diffserv(struct sk_buff *skb)
+-{
+-      switch (skb->protocol) {
+-      case htons(ETH_P_IP):
+-              ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+-              break;
+-      case htons(ETH_P_IPV6):
+-              ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+-              break;
+-      default:
+-              break;
+-      }
+-}
+-
+ static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+ {
+       int wlen = skb_network_offset(skb);
+@@ -1564,25 +1550,27 @@ static struct cake_tin_data *cake_select
+ {
+       struct cake_sched_data *q = qdisc_priv(sch);
+       u32 tin;
++      u8 dscp;
++
++      /* Tin selection: Default to diffserv-based selection, allow overriding
++       * using firewall marks or skb->priority.
++       */
++      dscp = cake_handle_diffserv(skb,
++                                  q->rate_flags & CAKE_FLAG_WASH);
++
++      if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
++              tin = 0;
+-      if (TC_H_MAJ(skb->priority) == sch->handle &&
+-          TC_H_MIN(skb->priority) > 0 &&
+-          TC_H_MIN(skb->priority) <= q->tin_cnt) {
++      else if (TC_H_MAJ(skb->priority) == sch->handle &&
++               TC_H_MIN(skb->priority) > 0 &&
++               TC_H_MIN(skb->priority) <= q->tin_cnt)
+               tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
+-              if (q->rate_flags & CAKE_FLAG_WASH)
+-                      cake_wash_diffserv(skb);
+-      } else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) {
+-              /* extract the Diffserv Precedence field, if it exists */
+-              /* and clear DSCP bits if washing */
+-              tin = q->tin_index[cake_handle_diffserv(skb,
+-                              q->rate_flags & CAKE_FLAG_WASH)];
++      else {
++              tin = q->tin_index[dscp];
++
+               if (unlikely(tin >= q->tin_cnt))
+                       tin = 0;
+-      } else {
+-              tin = 0;
+-              if (q->rate_flags & CAKE_FLAG_WASH)
+-                      cake_wash_diffserv(skb);
+       }
+       return &q->tins[tin];
diff --git a/queue-5.0/sch_cake-use-tc_skb_protocol-helper-for-getting-packet-protocol.patch b/queue-5.0/sch_cake-use-tc_skb_protocol-helper-for-getting-packet-protocol.patch
new file mode 100644 (file)
index 0000000..d25b3df
--- /dev/null
@@ -0,0 +1,31 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: "Toke Høiland-Jørgensen" <toke@redhat.com>
+Date: Thu, 4 Apr 2019 15:01:33 +0200
+Subject: sch_cake: Use tc_skb_protocol() helper for getting packet protocol
+
+From: "Toke Høiland-Jørgensen" <toke@redhat.com>
+
+[ Upstream commit b2100cc56fca8c51d28aa42a9f1fbcb2cf351996 ]
+
+We shouldn't be using skb->protocol directly as that will miss cases with
+hardware-accelerated VLAN tags. Use the helper instead to get the right
+protocol number.
+
+Reported-by: Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk>
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_cake.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -1526,7 +1526,7 @@ static u8 cake_handle_diffserv(struct sk
+ {
+       u8 dscp;
+-      switch (skb->protocol) {
++      switch (tc_skb_protocol(skb)) {
+       case htons(ETH_P_IP):
+               dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+               if (wash && dscp)
diff --git a/queue-5.0/series b/queue-5.0/series
new file mode 100644 (file)
index 0000000..1404258
--- /dev/null
@@ -0,0 +1,43 @@
+bonding-fix-event-handling-for-stacked-bonds.patch
+failover-allow-name-change-on-iff_up-slave-interfaces.patch
+net-atm-fix-potential-spectre-v1-vulnerabilities.patch
+net-bridge-fix-per-port-af_packet-sockets.patch
+net-bridge-multicast-use-rcu-to-access-port-list-from-br_multicast_start_querier.patch
+net-fec-manage-ahb-clock-in-runtime-pm.patch
+net-fix-missing-meta-data-in-skb-with-vlan-packet.patch
+net-fou-do-not-use-guehdr-after-iptunnel_pull_offloads-in-gue_udp_recv.patch
+tcp-tcp_grow_window-needs-to-respect-tcp_space.patch
+team-set-slave-to-promisc-if-team-is-already-in-promisc-mode.patch
+tipc-missing-entries-in-name-table-of-publications.patch
+vhost-reject-zero-size-iova-range.patch
+ipv4-recompile-ip-options-in-ipv4_link_failure.patch
+ipv4-ensure-rcu_read_lock-in-ipv4_link_failure.patch
+mlxsw-spectrum_switchdev-add-mdb-entries-in-prepare-phase.patch
+mlxsw-core-do-not-use-wq_mem_reclaim-for-emad-workqueue.patch
+mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-ordered-workqueue.patch
+mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-workqueue.patch
+mlxsw-spectrum_router-do-not-check-vrf-mac-address.patch
+net-thunderx-raise-xdp-mtu-to-1508.patch
+net-thunderx-don-t-allow-jumbo-frames-with-xdp.patch
+net-tls-fix-the-iv-leaks.patch
+net-tls-don-t-leak-partially-sent-record-in-device-mode.patch
+net-strparser-partially-revert-strparser-call-skb_unclone-conditionally.patch
+net-tls-fix-build-without-config_tls_device.patch
+net-bridge-fix-netlink-export-of-vlan_stats_per_port-option.patch
+net-mlx5e-xdp-avoid-checksum-complete-when-xdp-prog-is-loaded.patch
+net-mlx5e-protect-against-non-uplink-representor-for-encap.patch
+net-mlx5e-switch-to-toeplitz-rss-hash-by-default.patch
+net-mlx5e-rx-fixup-skb-checksum-for-packets-with-tail-padding.patch
+net-mlx5e-rx-check-ip-headers-sanity.patch
+revert-net-mlx5e-enable-reporting-checksum-unnecessary-also-for-l3-packets.patch
+net-mlx5-fpga-tls-hold-rcu-read-lock-a-bit-longer.patch
+net-tls-prevent-bad-memory-access-in-tls_is_sk_tx_device_offloaded.patch
+net-mlx5-fpga-tls-idr-remove-on-flow-delete.patch
+route-avoid-crash-from-dereferencing-null-rt-from.patch
+nfp-flower-replace-cfi-with-vlan-present.patch
+nfp-flower-remove-vlan-cfi-bit-from-push-vlan-action.patch
+sch_cake-use-tc_skb_protocol-helper-for-getting-packet-protocol.patch
+sch_cake-make-sure-we-can-write-the-ip-header-before-changing-dscp-bits.patch
+nfc-nci-add-some-bounds-checking-in-nci_hci_cmd_received.patch
+nfc-nci-potential-off-by-one-in-pipes-array.patch
+sch_cake-simplify-logic-in-cake_select_tin.patch
diff --git a/queue-5.0/tcp-tcp_grow_window-needs-to-respect-tcp_space.patch b/queue-5.0/tcp-tcp_grow_window-needs-to-respect-tcp_space.patch
new file mode 100644 (file)
index 0000000..bace19c
--- /dev/null
@@ -0,0 +1,63 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 16 Apr 2019 10:55:20 -0700
+Subject: tcp: tcp_grow_window() needs to respect tcp_space()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 50ce163a72d817a99e8974222dcf2886d5deb1ae ]
+
+For some reason, tcp_grow_window() correctly tests if enough room
+is present before attempting to increase tp->rcv_ssthresh,
+but does not prevent it to grow past tcp_space()
+
+This is causing hard to debug issues, like failing
+the (__tcp_select_window(sk) >= tp->rcv_wnd) test
+in __tcp_ack_snd_check(), causing ACK delays and possibly
+slow flows.
+
+Depending on tcp_rmem[2], MTU, skb->len/skb->truesize ratio,
+we can see the problem happening on "netperf -t TCP_RR -- -r 2000,2000"
+after about 60 round trips, when the active side no longer sends
+immediate acks.
+
+This bug predates git history.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Wei Wang <weiwan@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -402,11 +402,12 @@ static int __tcp_grow_window(const struc
+ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
++      int room;
++
++      room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
+       /* Check #1 */
+-      if (tp->rcv_ssthresh < tp->window_clamp &&
+-          (int)tp->rcv_ssthresh < tcp_space(sk) &&
+-          !tcp_under_memory_pressure(sk)) {
++      if (room > 0 && !tcp_under_memory_pressure(sk)) {
+               int incr;
+               /* Check #2. Increase window, if skb with such overhead
+@@ -419,8 +420,7 @@ static void tcp_grow_window(struct sock
+               if (incr) {
+                       incr = max_t(int, incr, 2 * skb->len);
+-                      tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
+-                                             tp->window_clamp);
++                      tp->rcv_ssthresh += min(room, incr);
+                       inet_csk(sk)->icsk_ack.quick |= 1;
+               }
+       }
diff --git a/queue-5.0/team-set-slave-to-promisc-if-team-is-already-in-promisc-mode.patch b/queue-5.0/team-set-slave-to-promisc-if-team-is-already-in-promisc-mode.patch
new file mode 100644 (file)
index 0000000..80bc104
--- /dev/null
@@ -0,0 +1,73 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Mon, 8 Apr 2019 16:45:17 +0800
+Subject: team: set slave to promisc if team is already in promisc mode
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 43c2adb9df7ddd6560fd3546d925b42cef92daa0 ]
+
+After adding a team interface to bridge, the team interface will enter
+promisc mode. Then if we add a new slave to team0, the slave will keep
+promisc off. Fix it by setting slave to promisc on if team master is
+already in promisc mode, also do the same for allmulti.
+
+v2: add promisc and allmulti checking when delete ports
+
+Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device")
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/team/team.c |   26 ++++++++++++++++++++++++++
+ 1 file changed, 26 insertions(+)
+
+--- a/drivers/net/team/team.c
++++ b/drivers/net/team/team.c
+@@ -1247,6 +1247,23 @@ static int team_port_add(struct team *te
+               goto err_option_port_add;
+       }
++      /* set promiscuity level to new slave */
++      if (dev->flags & IFF_PROMISC) {
++              err = dev_set_promiscuity(port_dev, 1);
++              if (err)
++                      goto err_set_slave_promisc;
++      }
++
++      /* set allmulti level to new slave */
++      if (dev->flags & IFF_ALLMULTI) {
++              err = dev_set_allmulti(port_dev, 1);
++              if (err) {
++                      if (dev->flags & IFF_PROMISC)
++                              dev_set_promiscuity(port_dev, -1);
++                      goto err_set_slave_promisc;
++              }
++      }
++
+       netif_addr_lock_bh(dev);
+       dev_uc_sync_multiple(port_dev, dev);
+       dev_mc_sync_multiple(port_dev, dev);
+@@ -1263,6 +1280,9 @@ static int team_port_add(struct team *te
+       return 0;
++err_set_slave_promisc:
++      __team_option_inst_del_port(team, port);
++
+ err_option_port_add:
+       team_upper_dev_unlink(team, port);
+@@ -1308,6 +1328,12 @@ static int team_port_del(struct team *te
+       team_port_disable(team, port);
+       list_del_rcu(&port->list);
++
++      if (dev->flags & IFF_PROMISC)
++              dev_set_promiscuity(port_dev, -1);
++      if (dev->flags & IFF_ALLMULTI)
++              dev_set_allmulti(port_dev, -1);
++
+       team_upper_dev_unlink(team, port);
+       netdev_rx_handler_unregister(port_dev);
+       team_port_disable_netpoll(port);
diff --git a/queue-5.0/tipc-missing-entries-in-name-table-of-publications.patch b/queue-5.0/tipc-missing-entries-in-name-table-of-publications.patch
new file mode 100644 (file)
index 0000000..b017f09
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Hoang Le <hoang.h.le@dektech.com.au>
+Date: Tue, 9 Apr 2019 14:59:24 +0700
+Subject: tipc: missing entries in name table of publications
+
+From: Hoang Le <hoang.h.le@dektech.com.au>
+
+[ Upstream commit d1841533e54876f152a30ac398a34f47ad6590b1 ]
+
+When binding multiple services with specific type 1Ki, 2Ki..,
+this leads to some entries in the name table of publications
+missing when listed out via 'tipc name show'.
+
+The problem is at identify zero last_type conditional provided
+via netlink. The first is initial 'type' when starting name table
+dummping. The second is continuously with zero type (node state
+service type). Then, lookup function failure to finding node state
+service type in next iteration.
+
+To solve this, adding more conditional to marked as dirty type and
+lookup correct service type for the next iteration instead of select
+the first service as initial 'type' zero.
+
+Acked-by: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: Hoang Le <hoang.h.le@dektech.com.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/name_table.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/tipc/name_table.c
++++ b/net/tipc/name_table.c
+@@ -909,7 +909,8 @@ static int tipc_nl_service_list(struct n
+       for (; i < TIPC_NAMETBL_SIZE; i++) {
+               head = &tn->nametbl->services[i];
+-              if (*last_type) {
++              if (*last_type ||
++                  (!i && *last_key && (*last_lower == *last_key))) {
+                       service = tipc_service_find(net, *last_type);
+                       if (!service)
+                               return -EPIPE;
diff --git a/queue-5.0/vhost-reject-zero-size-iova-range.patch b/queue-5.0/vhost-reject-zero-size-iova-range.patch
new file mode 100644 (file)
index 0000000..71758ad
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Sat Apr 20 16:43:09 CEST 2019
+From: Jason Wang <jasowang@redhat.com>
+Date: Tue, 9 Apr 2019 12:10:25 +0800
+Subject: vhost: reject zero size iova range
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit 813dbeb656d6c90266f251d8bd2b02d445afa63f ]
+
+We used to accept zero size iova range which will lead a infinite loop
+in translate_desc(). Fixing this by failing the request in this case.
+
+Reported-by: syzbot+d21e6e297322a900c128@syzkaller.appspotmail.com
+Fixes: 6b1e6cc7 ("vhost: new device IOTLB API")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/vhost.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -911,8 +911,12 @@ static int vhost_new_umem_range(struct v
+                               u64 start, u64 size, u64 end,
+                               u64 userspace_addr, int perm)
+ {
+-      struct vhost_umem_node *tmp, *node = kmalloc(sizeof(*node), GFP_ATOMIC);
++      struct vhost_umem_node *tmp, *node;
++      if (!size)
++              return -EFAULT;
++
++      node = kmalloc(sizeof(*node), GFP_ATOMIC);
+       if (!node)
+               return -ENOMEM;