From: Greg Kroah-Hartman Date: Sat, 20 Apr 2019 14:44:19 +0000 (+0200) Subject: 5.0-stable patches X-Git-Tag: v3.18.139~50 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0ee3da53d0551b3e52e991ccd07d957ffcc399bf;p=thirdparty%2Fkernel%2Fstable-queue.git 5.0-stable patches added patches: bonding-fix-event-handling-for-stacked-bonds.patch failover-allow-name-change-on-iff_up-slave-interfaces.patch ipv4-ensure-rcu_read_lock-in-ipv4_link_failure.patch ipv4-recompile-ip-options-in-ipv4_link_failure.patch mlxsw-core-do-not-use-wq_mem_reclaim-for-emad-workqueue.patch mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-ordered-workqueue.patch mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-workqueue.patch mlxsw-spectrum_router-do-not-check-vrf-mac-address.patch mlxsw-spectrum_switchdev-add-mdb-entries-in-prepare-phase.patch net-atm-fix-potential-spectre-v1-vulnerabilities.patch net-bridge-fix-netlink-export-of-vlan_stats_per_port-option.patch net-bridge-fix-per-port-af_packet-sockets.patch net-bridge-multicast-use-rcu-to-access-port-list-from-br_multicast_start_querier.patch net-fec-manage-ahb-clock-in-runtime-pm.patch net-fix-missing-meta-data-in-skb-with-vlan-packet.patch net-fou-do-not-use-guehdr-after-iptunnel_pull_offloads-in-gue_udp_recv.patch net-mlx5-fpga-tls-hold-rcu-read-lock-a-bit-longer.patch net-mlx5-fpga-tls-idr-remove-on-flow-delete.patch net-mlx5e-protect-against-non-uplink-representor-for-encap.patch net-mlx5e-rx-check-ip-headers-sanity.patch net-mlx5e-rx-fixup-skb-checksum-for-packets-with-tail-padding.patch net-mlx5e-switch-to-toeplitz-rss-hash-by-default.patch net-mlx5e-xdp-avoid-checksum-complete-when-xdp-prog-is-loaded.patch net-strparser-partially-revert-strparser-call-skb_unclone-conditionally.patch net-thunderx-don-t-allow-jumbo-frames-with-xdp.patch net-thunderx-raise-xdp-mtu-to-1508.patch net-tls-don-t-leak-partially-sent-record-in-device-mode.patch net-tls-fix-build-without-config_tls_device.patch net-tls-fix-the-iv-leaks.patch net-tls-prevent-bad-memory-access-in-tls_is_sk_tx_device_offloaded.patch nfc-nci-add-some-bounds-checking-in-nci_hci_cmd_received.patch nfc-nci-potential-off-by-one-in-pipes-array.patch nfp-flower-remove-vlan-cfi-bit-from-push-vlan-action.patch nfp-flower-replace-cfi-with-vlan-present.patch revert-net-mlx5e-enable-reporting-checksum-unnecessary-also-for-l3-packets.patch route-avoid-crash-from-dereferencing-null-rt-from.patch sch_cake-make-sure-we-can-write-the-ip-header-before-changing-dscp-bits.patch sch_cake-simplify-logic-in-cake_select_tin.patch sch_cake-use-tc_skb_protocol-helper-for-getting-packet-protocol.patch tcp-tcp_grow_window-needs-to-respect-tcp_space.patch team-set-slave-to-promisc-if-team-is-already-in-promisc-mode.patch tipc-missing-entries-in-name-table-of-publications.patch vhost-reject-zero-size-iova-range.patch --- diff --git a/queue-5.0/bonding-fix-event-handling-for-stacked-bonds.patch b/queue-5.0/bonding-fix-event-handling-for-stacked-bonds.patch new file mode 100644 index 00000000000..42d74599586 --- /dev/null +++ b/queue-5.0/bonding-fix-event-handling-for-stacked-bonds.patch @@ -0,0 +1,47 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Sabrina Dubroca +Date: Fri, 12 Apr 2019 15:04:10 +0200 +Subject: bonding: fix event handling for stacked bonds + +From: Sabrina Dubroca + +[ Upstream commit 92480b3977fd3884649d404cbbaf839b70035699 ] + +When a bond is enslaved to another bond, bond_netdev_event() only +handles the event as if the bond is a master, and skips treating the +bond as a slave. + +This leads to a refcount leak on the slave, since we don't remove the +adjacency to its master and the master holds a reference on the slave. + +Reproducer: + ip link add bondL type bond + ip link add bondU type bond + ip link set bondL master bondU + ip link del bondL + +No "Fixes:" tag, this code is older than git history. + +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -3214,8 +3214,12 @@ static int bond_netdev_event(struct noti + return NOTIFY_DONE; + + if (event_dev->flags & IFF_MASTER) { ++ int ret; ++ + netdev_dbg(event_dev, "IFF_MASTER\n"); +- return bond_master_netdev_event(event, event_dev); ++ ret = bond_master_netdev_event(event, event_dev); ++ if (ret != NOTIFY_DONE) ++ return ret; + } + + if (event_dev->flags & IFF_SLAVE) { diff --git a/queue-5.0/failover-allow-name-change-on-iff_up-slave-interfaces.patch b/queue-5.0/failover-allow-name-change-on-iff_up-slave-interfaces.patch new file mode 100644 index 00000000000..89440f7f039 --- /dev/null +++ b/queue-5.0/failover-allow-name-change-on-iff_up-slave-interfaces.patch @@ -0,0 +1,145 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Si-Wei Liu +Date: Mon, 8 Apr 2019 19:45:27 -0400 +Subject: failover: allow name change on IFF_UP slave interfaces + +From: Si-Wei Liu + +[ Upstream commit 8065a779f17e94536a1c4dcee4f9d88011672f97 ] + +When a netdev appears through hot plug then gets enslaved by a failover +master that is already up and running, the slave will be opened +right away after getting enslaved. Today there's a race that userspace +(udev) may fail to rename the slave if the kernel (net_failover) +opens the slave earlier than when the userspace rename happens. +Unlike bond or team, the primary slave of failover can't be renamed by +userspace ahead of time, since the kernel initiated auto-enslavement is +unable to, or rather, is never meant to be synchronized with the rename +request from userspace. + +As the failover slave interfaces are not designed to be operated +directly by userspace apps: IP configuration, filter rules with +regard to network traffic passing and etc., should all be done on master +interface. In general, userspace apps only care about the +name of master interface, while slave names are less important as long +as admin users can see reliable names that may carry +other information describing the netdev. For e.g., they can infer that +"ens3nsby" is a standby slave of "ens3", while for a +name like "eth0" they can't tell which master it belongs to. + +Historically the name of IFF_UP interface can't be changed because +there might be admin script or management software that is already +relying on such behavior and assumes that the slave name can't be +changed once UP. But failover is special: with the in-kernel +auto-enslavement mechanism, the userspace expectation for device +enumeration and bring-up order is already broken. Previously initramfs +and various userspace config tools were modified to bypass failover +slaves because of auto-enslavement and duplicate MAC address. Similarly, +in case that users care about seeing reliable slave name, the new type +of failover slaves needs to be taken care of specifically in userspace +anyway. + +It's less risky to lift up the rename restriction on failover slave +which is already UP. Although it's possible this change may potentially +break userspace component (most likely configuration scripts or +management software) that assumes slave name can't be changed while +UP, it's relatively a limited and controllable set among all userspace +components, which can be fixed specifically to listen for the rename +events on failover slaves. Userspace component interacting with slaves +is expected to be changed to operate on failover master interface +instead, as the failover slave is dynamic in nature which may come and +go at any point. The goal is to make the role of failover slaves less +relevant, and userspace components should only deal with failover master +in the long run. + +Fixes: 30c8bd5aa8b2 ("net: Introduce generic failover module") +Signed-off-by: Si-Wei Liu +Reviewed-by: Liran Alon +Acked-by: Sridhar Samudrala +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 3 +++ + net/core/dev.c | 16 +++++++++++++++- + net/core/failover.c | 6 +++--- + 3 files changed, 21 insertions(+), 4 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -1484,6 +1484,7 @@ struct net_device_ops { + * @IFF_FAILOVER: device is a failover master device + * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device + * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device ++ * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running + */ + enum netdev_priv_flags { + IFF_802_1Q_VLAN = 1<<0, +@@ -1516,6 +1517,7 @@ enum netdev_priv_flags { + IFF_FAILOVER = 1<<27, + IFF_FAILOVER_SLAVE = 1<<28, + IFF_L3MDEV_RX_HANDLER = 1<<29, ++ IFF_LIVE_RENAME_OK = 1<<30, + }; + + #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN +@@ -1547,6 +1549,7 @@ enum netdev_priv_flags { + #define IFF_FAILOVER IFF_FAILOVER + #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE + #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER ++#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK + + /** + * struct net_device - The DEVICE structure. +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1184,7 +1184,21 @@ int dev_change_name(struct net_device *d + BUG_ON(!dev_net(dev)); + + net = dev_net(dev); +- if (dev->flags & IFF_UP) ++ ++ /* Some auto-enslaved devices e.g. failover slaves are ++ * special, as userspace might rename the device after ++ * the interface had been brought up and running since ++ * the point kernel initiated auto-enslavement. Allow ++ * live name change even when these slave devices are ++ * up and running. ++ * ++ * Typically, users of these auto-enslaving devices ++ * don't actually care about slave name change, as ++ * they are supposed to operate on master interface ++ * directly. ++ */ ++ if (dev->flags & IFF_UP && ++ likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK))) + return -EBUSY; + + write_seqcount_begin(&devnet_rename_seq); +--- a/net/core/failover.c ++++ b/net/core/failover.c +@@ -80,14 +80,14 @@ static int failover_slave_register(struc + goto err_upper_link; + } + +- slave_dev->priv_flags |= IFF_FAILOVER_SLAVE; ++ slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); + + if (fops && fops->slave_register && + !fops->slave_register(slave_dev, failover_dev)) + return NOTIFY_OK; + + netdev_upper_dev_unlink(slave_dev, failover_dev); +- slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; ++ slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); + err_upper_link: + netdev_rx_handler_unregister(slave_dev); + done: +@@ -121,7 +121,7 @@ int failover_slave_unregister(struct net + + netdev_rx_handler_unregister(slave_dev); + netdev_upper_dev_unlink(slave_dev, failover_dev); +- slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; ++ slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK); + + if (fops && fops->slave_unregister && + !fops->slave_unregister(slave_dev, failover_dev)) diff --git a/queue-5.0/ipv4-ensure-rcu_read_lock-in-ipv4_link_failure.patch b/queue-5.0/ipv4-ensure-rcu_read_lock-in-ipv4_link_failure.patch new file mode 100644 index 00000000000..b5da459f59e --- /dev/null +++ b/queue-5.0/ipv4-ensure-rcu_read_lock-in-ipv4_link_failure.patch @@ -0,0 +1,88 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Eric Dumazet +Date: Sat, 13 Apr 2019 17:32:21 -0700 +Subject: ipv4: ensure rcu_read_lock() in ipv4_link_failure() + +From: Eric Dumazet + +[ Upstream commit c543cb4a5f07e09237ec0fc2c60c9f131b2c79ad ] + +fib_compute_spec_dst() needs to be called under rcu protection. + +syzbot reported : + +WARNING: suspicious RCU usage +5.1.0-rc4+ #165 Not tainted +include/linux/inetdevice.h:220 suspicious rcu_dereference_check() usage! + +other info that might help us debug this: + +rcu_scheduler_active = 2, debug_locks = 1 +1 lock held by swapper/0/0: + #0: 0000000051b67925 ((&n->timer)){+.-.}, at: lockdep_copy_map include/linux/lockdep.h:170 [inline] + #0: 0000000051b67925 ((&n->timer)){+.-.}, at: call_timer_fn+0xda/0x720 kernel/time/timer.c:1315 + +stack backtrace: +CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.1.0-rc4+ #165 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x172/0x1f0 lib/dump_stack.c:113 + lockdep_rcu_suspicious+0x153/0x15d kernel/locking/lockdep.c:5162 + __in_dev_get_rcu include/linux/inetdevice.h:220 [inline] + fib_compute_spec_dst+0xbbd/0x1030 net/ipv4/fib_frontend.c:294 + spec_dst_fill net/ipv4/ip_options.c:245 [inline] + __ip_options_compile+0x15a7/0x1a10 net/ipv4/ip_options.c:343 + ipv4_link_failure+0x172/0x400 net/ipv4/route.c:1195 + dst_link_failure include/net/dst.h:427 [inline] + arp_error_report+0xd1/0x1c0 net/ipv4/arp.c:297 + neigh_invalidate+0x24b/0x570 net/core/neighbour.c:995 + neigh_timer_handler+0xc35/0xf30 net/core/neighbour.c:1081 + call_timer_fn+0x190/0x720 kernel/time/timer.c:1325 + expire_timers kernel/time/timer.c:1362 [inline] + __run_timers kernel/time/timer.c:1681 [inline] + __run_timers kernel/time/timer.c:1649 [inline] + run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694 + __do_softirq+0x266/0x95a kernel/softirq.c:293 + invoke_softirq kernel/softirq.c:374 [inline] + irq_exit+0x180/0x1d0 kernel/softirq.c:414 + exiting_irq arch/x86/include/asm/apic.h:536 [inline] + smp_apic_timer_interrupt+0x14a/0x570 arch/x86/kernel/apic/apic.c:1062 + apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807 + +Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Stephen Suryaputra +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1185,14 +1185,20 @@ static struct dst_entry *ipv4_dst_check( + + static void ipv4_link_failure(struct sk_buff *skb) + { +- struct rtable *rt; + struct ip_options opt; ++ struct rtable *rt; ++ int res; + + /* Recompile ip options since IPCB may not be valid anymore. + */ + memset(&opt, 0, sizeof(opt)); + opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); +- if (__ip_options_compile(dev_net(skb->dev), &opt, skb, NULL)) ++ ++ rcu_read_lock(); ++ res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); ++ rcu_read_unlock(); ++ ++ if (res) + return; + + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); diff --git a/queue-5.0/ipv4-recompile-ip-options-in-ipv4_link_failure.patch b/queue-5.0/ipv4-recompile-ip-options-in-ipv4_link_failure.patch new file mode 100644 index 00000000000..ddb7fb5d88c --- /dev/null +++ b/queue-5.0/ipv4-recompile-ip-options-in-ipv4_link_failure.patch @@ -0,0 +1,42 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Stephen Suryaputra +Date: Fri, 12 Apr 2019 16:19:27 -0400 +Subject: ipv4: recompile ip options in ipv4_link_failure + +From: Stephen Suryaputra + +[ Upstream commit ed0de45a1008991fdaa27a0152befcb74d126a8b ] + +Recompile IP options since IPCB may not be valid anymore when +ipv4_link_failure is called from arp_error_report. + +Refer to the commit 3da1ed7ac398 ("net: avoid use IPCB in cipso_v4_error") +and the commit before that (9ef6b42ad6fd) for a similar issue. + +Signed-off-by: Stephen Suryaputra +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1186,8 +1186,16 @@ static struct dst_entry *ipv4_dst_check( + static void ipv4_link_failure(struct sk_buff *skb) + { + struct rtable *rt; ++ struct ip_options opt; + +- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); ++ /* Recompile ip options since IPCB may not be valid anymore. ++ */ ++ memset(&opt, 0, sizeof(opt)); ++ opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); ++ if (__ip_options_compile(dev_net(skb->dev), &opt, skb, NULL)) ++ return; ++ ++ __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); + + rt = skb_rtable(skb); + if (rt) diff --git a/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-emad-workqueue.patch b/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-emad-workqueue.patch new file mode 100644 index 00000000000..e1c11f2bc3f --- /dev/null +++ b/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-emad-workqueue.patch @@ -0,0 +1,36 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Ido Schimmel +Date: Wed, 10 Apr 2019 06:58:13 +0000 +Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for EMAD workqueue + +From: Ido Schimmel + +[ Upstream commit a8c133b06183c529c51cd0d54eb57d6b7078370c ] + +The EMAD workqueue is used to handle retransmission of EMAD packets that +contain configuration data for the device's firmware. + +Given the workers need to allocate these packets and that the code is +not called as part of memory reclaim path, remove the WQ_MEM_RECLAIM +flag. + +Fixes: d965465b60ba ("mlxsw: core: Fix possible deadlock") +Signed-off-by: Ido Schimmel +Acked-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/core.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/core.c +@@ -568,7 +568,7 @@ static int mlxsw_emad_init(struct mlxsw_ + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return 0; + +- emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0); ++ emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0); + if (!emad_wq) + return -ENOMEM; + mlxsw_core->emad_wq = emad_wq; diff --git a/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-ordered-workqueue.patch b/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-ordered-workqueue.patch new file mode 100644 index 00000000000..1ab4286523c --- /dev/null +++ b/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-ordered-workqueue.patch @@ -0,0 +1,77 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Ido Schimmel +Date: Wed, 10 Apr 2019 06:58:14 +0000 +Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw ordered workqueue + +From: Ido Schimmel + +[ Upstream commit 4af0699782e2cc7d0d89db9eb6f8844dd3df82dc ] + +The ordered workqueue is used to offload various objects such as routes +and neighbours in the order they are notified. + +It should not be called as part of memory reclaim path, so remove the +WQ_MEM_RECLAIM flag. This can also result in a warning [1], if a worker +tries to flush a non-WQ_MEM_RECLAIM workqueue. + +[1] +[97703.542861] workqueue: WQ_MEM_RECLAIM mlxsw_core_ordered:mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] is flushing !WQ_MEM_RECLAIM events:rht_deferred_worker +[97703.542884] WARNING: CPU: 1 PID: 32492 at kernel/workqueue.c:2605 check_flush_dependency+0xb5/0x130 +... +[97703.542988] Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018 +[97703.543049] Workqueue: mlxsw_core_ordered mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] +[97703.543061] RIP: 0010:check_flush_dependency+0xb5/0x130 +... +[97703.543071] RSP: 0018:ffffb3f08137bc00 EFLAGS: 00010086 +[97703.543076] RAX: 0000000000000000 RBX: ffff96e07740ae00 RCX: 0000000000000000 +[97703.543080] RDX: 0000000000000094 RSI: ffffffff82dc1934 RDI: 0000000000000046 +[97703.543084] RBP: ffffb3f08137bc20 R08: ffffffff82dc18a0 R09: 00000000000225c0 +[97703.543087] R10: 0000000000000000 R11: 0000000000007eec R12: ffffffff816e4ee0 +[97703.543091] R13: ffff96e06f6a5c00 R14: ffff96e077ba7700 R15: ffffffff812ab0c0 +[97703.543097] FS: 0000000000000000(0000) GS:ffff96e077a80000(0000) knlGS:0000000000000000 +[97703.543101] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[97703.543104] CR2: 00007f8cd135b280 CR3: 00000001e860e003 CR4: 00000000003606e0 +[97703.543109] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[97703.543112] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[97703.543115] Call Trace: +[97703.543129] __flush_work+0xbd/0x1e0 +[97703.543137] ? __cancel_work_timer+0x136/0x1b0 +[97703.543145] ? pwq_dec_nr_in_flight+0x49/0xa0 +[97703.543154] __cancel_work_timer+0x136/0x1b0 +[97703.543175] ? mlxsw_reg_trans_bulk_wait+0x145/0x400 [mlxsw_core] +[97703.543184] cancel_work_sync+0x10/0x20 +[97703.543191] rhashtable_free_and_destroy+0x23/0x140 +[97703.543198] rhashtable_destroy+0xd/0x10 +[97703.543254] mlxsw_sp_fib_destroy+0xb1/0xf0 [mlxsw_spectrum] +[97703.543310] mlxsw_sp_vr_put+0xa8/0xc0 [mlxsw_spectrum] +[97703.543364] mlxsw_sp_fib_node_put+0xbf/0x140 [mlxsw_spectrum] +[97703.543418] ? mlxsw_sp_fib6_entry_destroy+0xe8/0x110 [mlxsw_spectrum] +[97703.543475] mlxsw_sp_router_fib6_event_work+0x6cd/0x7f0 [mlxsw_spectrum] +[97703.543484] process_one_work+0x1fd/0x400 +[97703.543493] worker_thread+0x34/0x410 +[97703.543500] kthread+0x121/0x140 +[97703.543507] ? process_one_work+0x400/0x400 +[97703.543512] ? kthread_park+0x90/0x90 +[97703.543523] ret_from_fork+0x35/0x40 + +Fixes: a3832b31898f ("mlxsw: core: Create an ordered workqueue for FIB offload") +Signed-off-by: Ido Schimmel +Reported-by: Semion Lisyansky +Acked-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/core.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/core.c +@@ -1915,7 +1915,7 @@ static int __init mlxsw_core_module_init + mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); + if (!mlxsw_wq) + return -ENOMEM; +- mlxsw_owq = alloc_ordered_workqueue("%s_ordered", WQ_MEM_RECLAIM, ++ mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0, + mlxsw_core_driver_name); + if (!mlxsw_owq) { + err = -ENOMEM; diff --git a/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-workqueue.patch b/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-workqueue.patch new file mode 100644 index 00000000000..22a90198499 --- /dev/null +++ b/queue-5.0/mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-workqueue.patch @@ -0,0 +1,36 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Ido Schimmel +Date: Wed, 10 Apr 2019 06:58:15 +0000 +Subject: mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw workqueue + +From: Ido Schimmel + +[ Upstream commit b442fed1b724af0de087912a5718ddde1b87acbb ] + +The workqueue is used to periodically update the networking stack about +activity / statistics of various objects such as neighbours and TC +actions. + +It should not be called as part of memory reclaim path, so remove the +WQ_MEM_RECLAIM flag. + +Fixes: 3d5479e92087 ("mlxsw: core: Remove deprecated create_workqueue") +Signed-off-by: Ido Schimmel +Acked-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/core.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/core.c +@@ -1912,7 +1912,7 @@ static int __init mlxsw_core_module_init + { + int err; + +- mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); ++ mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0); + if (!mlxsw_wq) + return -ENOMEM; + mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0, diff --git a/queue-5.0/mlxsw-spectrum_router-do-not-check-vrf-mac-address.patch b/queue-5.0/mlxsw-spectrum_router-do-not-check-vrf-mac-address.patch new file mode 100644 index 00000000000..e9383d5e226 --- /dev/null +++ b/queue-5.0/mlxsw-spectrum_router-do-not-check-vrf-mac-address.patch @@ -0,0 +1,41 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Ido Schimmel +Date: Wed, 10 Apr 2019 06:58:15 +0000 +Subject: mlxsw: spectrum_router: Do not check VRF MAC address + +From: Ido Schimmel + +[ Upstream commit 972fae683cbad5cf348268e76abc6d55cfb3ba87 ] + +Commit 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC +addresses") enabled the driver to veto router interface (RIF) MAC +addresses that it cannot support. + +This check should only be performed for interfaces for which the driver +actually configures a RIF. A VRF upper is not one of them, so ignore it. + +Without this patch it is not possible to set an IP address on the VRF +device and use it as a loopback. + +Fixes: 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC addresses") +Signed-off-by: Ido Schimmel +Reported-by: Alexander Petrovskiy +Tested-by: Alexander Petrovskiy +Acked-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +@@ -6745,7 +6745,7 @@ static int mlxsw_sp_router_port_check_ri + /* A RIF is not created for macvlan netdevs. Their MAC is used to + * populate the FDB + */ +- if (netif_is_macvlan(dev)) ++ if (netif_is_macvlan(dev) || netif_is_l3_master(dev)) + return 0; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { diff --git a/queue-5.0/mlxsw-spectrum_switchdev-add-mdb-entries-in-prepare-phase.patch b/queue-5.0/mlxsw-spectrum_switchdev-add-mdb-entries-in-prepare-phase.patch new file mode 100644 index 00000000000..dbcb818476d --- /dev/null +++ b/queue-5.0/mlxsw-spectrum_switchdev-add-mdb-entries-in-prepare-phase.patch @@ -0,0 +1,66 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Ido Schimmel +Date: Wed, 10 Apr 2019 06:58:12 +0000 +Subject: mlxsw: spectrum_switchdev: Add MDB entries in prepare phase + +From: Ido Schimmel + +[ Upstream commit d4d0e40977ac450f32f2db5e4d8e23c9d2578899 ] + +The driver cannot guarantee in the prepare phase that it will be able to +write an MDB entry to the device. In case the driver returned success +during the prepare phase, but then failed to add the entry in the commit +phase, a WARNING [1] will be generated by the switchdev core. + +Fix this by doing the work in the prepare phase instead. + +[1] +[ 358.544486] swp12s0: Commit of object (id=2) failed. +[ 358.550061] WARNING: CPU: 0 PID: 30 at net/switchdev/switchdev.c:281 switchdev_port_obj_add_now+0x9b/0xe0 +[ 358.560754] CPU: 0 PID: 30 Comm: kworker/0:1 Not tainted 5.0.0-custom-13382-gf2449babf221 #1350 +[ 358.570472] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016 +[ 358.580582] Workqueue: events switchdev_deferred_process_work +[ 358.587001] RIP: 0010:switchdev_port_obj_add_now+0x9b/0xe0 +... +[ 358.614109] RSP: 0018:ffffa6b900d6fe18 EFLAGS: 00010286 +[ 358.619943] RAX: 0000000000000000 RBX: ffff8b00797ff000 RCX: 0000000000000000 +[ 358.627912] RDX: ffff8b00b7a1d4c0 RSI: ffff8b00b7a152e8 RDI: ffff8b00b7a152e8 +[ 358.635881] RBP: ffff8b005c3f5bc0 R08: 000000000000022b R09: 0000000000000000 +[ 358.643850] R10: 0000000000000000 R11: ffffa6b900d6fcc8 R12: 0000000000000000 +[ 358.651819] R13: dead000000000100 R14: ffff8b00b65a23c0 R15: 0ffff8b00b7a2200 +[ 358.659790] FS: 0000000000000000(0000) GS:ffff8b00b7a00000(0000) knlGS:0000000000000000 +[ 358.668820] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 358.675228] CR2: 00007f00aad90de0 CR3: 00000001ca80d000 CR4: 00000000001006f0 +[ 358.683188] Call Trace: +[ 358.685918] switchdev_port_obj_add_deferred+0x13/0x60 +[ 358.691655] switchdev_deferred_process+0x6b/0xf0 +[ 358.696907] switchdev_deferred_process_work+0xa/0x10 +[ 358.702548] process_one_work+0x1f5/0x3f0 +[ 358.707022] worker_thread+0x28/0x3c0 +[ 358.711099] ? process_one_work+0x3f0/0x3f0 +[ 358.715768] kthread+0x10d/0x130 +[ 358.719369] ? __kthread_create_on_node+0x180/0x180 +[ 358.724815] ret_from_fork+0x35/0x40 + +Fixes: 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support") +Signed-off-by: Ido Schimmel +Reported-by: Alex Kushnarov +Tested-by: Alex Kushnarov +Acked-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +@@ -1654,7 +1654,7 @@ static int mlxsw_sp_port_mdb_add(struct + u16 fid_index; + int err = 0; + +- if (switchdev_trans_ph_prepare(trans)) ++ if (switchdev_trans_ph_commit(trans)) + return 0; + + bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); diff --git a/queue-5.0/net-atm-fix-potential-spectre-v1-vulnerabilities.patch b/queue-5.0/net-atm-fix-potential-spectre-v1-vulnerabilities.patch new file mode 100644 index 00000000000..0e1835e8bc1 --- /dev/null +++ b/queue-5.0/net-atm-fix-potential-spectre-v1-vulnerabilities.patch @@ -0,0 +1,53 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: "Gustavo A. R. Silva" +Date: Mon, 15 Apr 2019 15:57:23 -0500 +Subject: net: atm: Fix potential Spectre v1 vulnerabilities + +From: "Gustavo A. R. Silva" + +[ Upstream commit 899537b73557aafbdd11050b501cf54b4f5c45af ] + +arg is controlled by user-space, hence leading to a potential +exploitation of the Spectre variant 1 vulnerability. + +This issue was detected with the help of Smatch: + +net/atm/lec.c:715 lec_mcast_attach() warn: potential spectre issue 'dev_lec' [r] (local cap) + +Fix this by sanitizing arg before using it to index dev_lec. + +Notice that given that speculation windows are large, the policy is +to kill the speculation on the first load and not worry if it can be +completed with a dependent load/store [1]. + +[1] https://lore.kernel.org/lkml/20180423164740.GY17484@dhcp22.suse.cz/ + +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/atm/lec.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/net/atm/lec.c ++++ b/net/atm/lec.c +@@ -710,7 +710,10 @@ static int lec_vcc_attach(struct atm_vcc + + static int lec_mcast_attach(struct atm_vcc *vcc, int arg) + { +- if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) ++ if (arg < 0 || arg >= MAX_LEC_ITF) ++ return -EINVAL; ++ arg = array_index_nospec(arg, MAX_LEC_ITF); ++ if (!dev_lec[arg]) + return -EINVAL; + vcc->proto_data = dev_lec[arg]; + return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc); +@@ -728,6 +731,7 @@ static int lecd_attach(struct atm_vcc *v + i = arg; + if (arg >= MAX_LEC_ITF) + return -EINVAL; ++ i = array_index_nospec(arg, MAX_LEC_ITF); + if (!dev_lec[i]) { + int size; + diff --git a/queue-5.0/net-bridge-fix-netlink-export-of-vlan_stats_per_port-option.patch b/queue-5.0/net-bridge-fix-netlink-export-of-vlan_stats_per_port-option.patch new file mode 100644 index 00000000000..8e9e859814b --- /dev/null +++ b/queue-5.0/net-bridge-fix-netlink-export-of-vlan_stats_per_port-option.patch @@ -0,0 +1,33 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Nikolay Aleksandrov +Date: Tue, 16 Apr 2019 16:15:56 +0300 +Subject: net: bridge: fix netlink export of vlan_stats_per_port option + +From: Nikolay Aleksandrov + +[ Upstream commit 600bea7dba1a72874ae0cd9bc66bf2abfe43b49d ] + +Since the introduction of the vlan_stats_per_port option the netlink +export of it has been broken since I made a typo and used the ifla +attribute instead of the bridge option to retrieve its state. +Sysfs export is fine, only netlink export has been affected. + +Fixes: 9163a0fc1f0c0 ("net: bridge: add support for per-port vlan stats") +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/bridge/br_netlink.c ++++ b/net/bridge/br_netlink.c +@@ -1441,7 +1441,7 @@ static int br_fill_info(struct sk_buff * + nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED, + br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) || + nla_put_u8(skb, IFLA_BR_VLAN_STATS_PER_PORT, +- br_opt_get(br, IFLA_BR_VLAN_STATS_PER_PORT))) ++ br_opt_get(br, BROPT_VLAN_STATS_PER_PORT))) + return -EMSGSIZE; + #endif + #ifdef CONFIG_BRIDGE_IGMP_SNOOPING diff --git a/queue-5.0/net-bridge-fix-per-port-af_packet-sockets.patch b/queue-5.0/net-bridge-fix-per-port-af_packet-sockets.patch new file mode 100644 index 00000000000..da5b5d291be --- /dev/null +++ b/queue-5.0/net-bridge-fix-per-port-af_packet-sockets.patch @@ -0,0 +1,77 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Nikolay Aleksandrov +Date: Thu, 11 Apr 2019 13:56:39 +0300 +Subject: net: bridge: fix per-port af_packet sockets + +From: Nikolay Aleksandrov + +[ Upstream commit 3b2e2904deb314cc77a2192f506f2fd44e3d10d0 ] + +When the commit below was introduced it changed two visible things: + - the skb was no longer passed through the protocol handlers with the + original device + - the skb was passed up the stack with skb->dev = bridge + +The first change broke af_packet sockets on bridge ports. For example we +use them for hostapd which listens for ETH_P_PAE packets on the ports. +We discussed two possible fixes: + - create a clone and pass it through NF_HOOK(), act on the original skb + based on the result + - somehow signal to the caller from the okfn() that it was called, + meaning the skb is ok to be passed, which this patch is trying to + implement via returning 1 from the bridge link-local okfn() + +Note that we rely on the fact that NF_QUEUE/STOLEN would return 0 and +drop/error would return < 0 thus the okfn() is called only when the +return was 1, so we signal to the caller that it was called by preserving +the return value from nf_hook(). + +Fixes: 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict") +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_input.c | 23 ++++++++++++++--------- + 1 file changed, 14 insertions(+), 9 deletions(-) + +--- a/net/bridge/br_input.c ++++ b/net/bridge/br_input.c +@@ -197,13 +197,10 @@ static void __br_handle_local_finish(str + /* note: already called with rcu_read_lock */ + static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) + { +- struct net_bridge_port *p = br_port_get_rcu(skb->dev); +- + __br_handle_local_finish(skb); + +- BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; +- br_pass_frame_up(skb); +- return 0; ++ /* return 1 to signal the okfn() was called so it's ok to use the skb */ ++ return 1; + } + + /* +@@ -280,10 +277,18 @@ rx_handler_result_t br_handle_frame(stru + goto forward; + } + +- /* Deliver packet to local host only */ +- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev), +- NULL, skb, skb->dev, NULL, br_handle_local_finish); +- return RX_HANDLER_CONSUMED; ++ /* The else clause should be hit when nf_hook(): ++ * - returns < 0 (drop/error) ++ * - returns = 0 (stolen/nf_queue) ++ * Thus return 1 from the okfn() to signal the skb is ok to pass ++ */ ++ if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, ++ dev_net(skb->dev), NULL, skb, skb->dev, NULL, ++ br_handle_local_finish) == 1) { ++ return RX_HANDLER_PASS; ++ } else { ++ return RX_HANDLER_CONSUMED; ++ } + } + + forward: diff --git a/queue-5.0/net-bridge-multicast-use-rcu-to-access-port-list-from-br_multicast_start_querier.patch b/queue-5.0/net-bridge-multicast-use-rcu-to-access-port-list-from-br_multicast_start_querier.patch new file mode 100644 index 00000000000..447f802ce03 --- /dev/null +++ b/queue-5.0/net-bridge-multicast-use-rcu-to-access-port-list-from-br_multicast_start_querier.patch @@ -0,0 +1,41 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Nikolay Aleksandrov +Date: Thu, 11 Apr 2019 15:08:25 +0300 +Subject: net: bridge: multicast: use rcu to access port list from br_multicast_start_querier + +From: Nikolay Aleksandrov + +[ Upstream commit c5b493ce192bd7a4e7bd073b5685aad121eeef82 ] + +br_multicast_start_querier() walks over the port list but it can be +called from a timer with only multicast_lock held which doesn't protect +the port list, so use RCU to walk over it. + +Fixes: c83b8fab06fc ("bridge: Restart queries when last querier expires") +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_multicast.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -1916,7 +1916,8 @@ static void br_multicast_start_querier(s + + __br_multicast_open(br, query); + +- list_for_each_entry(port, &br->port_list, list) { ++ rcu_read_lock(); ++ list_for_each_entry_rcu(port, &br->port_list, list) { + if (port->state == BR_STATE_DISABLED || + port->state == BR_STATE_BLOCKING) + continue; +@@ -1928,6 +1929,7 @@ static void br_multicast_start_querier(s + br_multicast_enable(&port->ip6_own_query); + #endif + } ++ rcu_read_unlock(); + } + + int br_multicast_toggle(struct net_bridge *br, unsigned long val) diff --git a/queue-5.0/net-fec-manage-ahb-clock-in-runtime-pm.patch b/queue-5.0/net-fec-manage-ahb-clock-in-runtime-pm.patch new file mode 100644 index 00000000000..031bc5264fb --- /dev/null +++ b/queue-5.0/net-fec-manage-ahb-clock-in-runtime-pm.patch @@ -0,0 +1,106 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Andy Duan +Date: Tue, 9 Apr 2019 03:40:56 +0000 +Subject: net: fec: manage ahb clock in runtime pm + +From: Andy Duan + +[ Upstream commit d7c3a206e6338e4ccdf030719dec028e26a521d5 ] + +Some SOC like i.MX6SX clock have some limits: +- ahb clock should be disabled before ipg. +- ahb and ipg clocks are required for MAC MII bus. +So, move the ahb clock to runtime management together with +ipg clock. + +Signed-off-by: Fugang Duan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/fec_main.c | 30 +++++++++++++++++++++--------- + 1 file changed, 21 insertions(+), 9 deletions(-) + +--- a/drivers/net/ethernet/freescale/fec_main.c ++++ b/drivers/net/ethernet/freescale/fec_main.c +@@ -1840,13 +1840,9 @@ static int fec_enet_clk_enable(struct ne + int ret; + + if (enable) { +- ret = clk_prepare_enable(fep->clk_ahb); +- if (ret) +- return ret; +- + ret = clk_prepare_enable(fep->clk_enet_out); + if (ret) +- goto failed_clk_enet_out; ++ return ret; + + if (fep->clk_ptp) { + mutex_lock(&fep->ptp_clk_mutex); +@@ -1866,7 +1862,6 @@ static int fec_enet_clk_enable(struct ne + + phy_reset_after_clk_enable(ndev->phydev); + } else { +- clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_enet_out); + if (fep->clk_ptp) { + mutex_lock(&fep->ptp_clk_mutex); +@@ -1885,8 +1880,6 @@ failed_clk_ref: + failed_clk_ptp: + if (fep->clk_enet_out) + clk_disable_unprepare(fep->clk_enet_out); +-failed_clk_enet_out: +- clk_disable_unprepare(fep->clk_ahb); + + return ret; + } +@@ -3470,6 +3463,9 @@ fec_probe(struct platform_device *pdev) + ret = clk_prepare_enable(fep->clk_ipg); + if (ret) + goto failed_clk_ipg; ++ ret = clk_prepare_enable(fep->clk_ahb); ++ if (ret) ++ goto failed_clk_ahb; + + fep->reg_phy = devm_regulator_get_optional(&pdev->dev, "phy"); + if (!IS_ERR(fep->reg_phy)) { +@@ -3563,6 +3559,9 @@ failed_reset: + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + failed_regulator: ++ clk_disable_unprepare(fep->clk_ahb); ++failed_clk_ahb: ++ clk_disable_unprepare(fep->clk_ipg); + failed_clk_ipg: + fec_enet_clk_enable(ndev, false); + failed_clk: +@@ -3686,6 +3685,7 @@ static int __maybe_unused fec_runtime_su + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); + ++ clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_ipg); + + return 0; +@@ -3695,8 +3695,20 @@ static int __maybe_unused fec_runtime_re + { + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); ++ int ret; + +- return clk_prepare_enable(fep->clk_ipg); ++ ret = clk_prepare_enable(fep->clk_ahb); ++ if (ret) ++ return ret; ++ ret = clk_prepare_enable(fep->clk_ipg); ++ if (ret) ++ goto failed_clk_ipg; ++ ++ return 0; ++ ++failed_clk_ipg: ++ clk_disable_unprepare(fep->clk_ahb); ++ return ret; + } + + static const struct dev_pm_ops fec_pm_ops = { diff --git a/queue-5.0/net-fix-missing-meta-data-in-skb-with-vlan-packet.patch b/queue-5.0/net-fix-missing-meta-data-in-skb-with-vlan-packet.patch new file mode 100644 index 00000000000..5afb8d54454 --- /dev/null +++ b/queue-5.0/net-fix-missing-meta-data-in-skb-with-vlan-packet.patch @@ -0,0 +1,49 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Yuya Kusakabe +Date: Tue, 16 Apr 2019 10:22:28 +0900 +Subject: net: Fix missing meta data in skb with vlan packet + +From: Yuya Kusakabe + +[ Upstream commit d85e8be2a5a02869f815dd0ac2d743deb4cd7957 ] + +skb_reorder_vlan_header() should move XDP meta data with ethernet header +if XDP meta data exists. + +Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") +Signed-off-by: Yuya Kusakabe +Signed-off-by: Takeru Hayasaka +Co-developed-by: Takeru Hayasaka +Reviewed-by: Toshiaki Makita +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -5083,7 +5083,8 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_l + + static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) + { +- int mac_len; ++ int mac_len, meta_len; ++ void *meta; + + if (skb_cow(skb, skb_headroom(skb)) < 0) { + kfree_skb(skb); +@@ -5095,6 +5096,13 @@ static struct sk_buff *skb_reorder_vlan_ + memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb), + mac_len - VLAN_HLEN - ETH_TLEN); + } ++ ++ meta_len = skb_metadata_len(skb); ++ if (meta_len) { ++ meta = skb_metadata_end(skb) - meta_len; ++ memmove(meta + VLAN_HLEN, meta, meta_len); ++ } ++ + skb->mac_header += VLAN_HLEN; + return skb; + } diff --git a/queue-5.0/net-fou-do-not-use-guehdr-after-iptunnel_pull_offloads-in-gue_udp_recv.patch b/queue-5.0/net-fou-do-not-use-guehdr-after-iptunnel_pull_offloads-in-gue_udp_recv.patch new file mode 100644 index 00000000000..cdafe873579 --- /dev/null +++ b/queue-5.0/net-fou-do-not-use-guehdr-after-iptunnel_pull_offloads-in-gue_udp_recv.patch @@ -0,0 +1,48 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Lorenzo Bianconi +Date: Tue, 9 Apr 2019 11:47:20 +0200 +Subject: net: fou: do not use guehdr after iptunnel_pull_offloads in gue_udp_recv + +From: Lorenzo Bianconi + +[ Upstream commit 988dc4a9a3b66be75b30405a5494faf0dc7cffb6 ] + +gue tunnels run iptunnel_pull_offloads on received skbs. This can +determine a possible use-after-free accessing guehdr pointer since +the packet will be 'uncloned' running pskb_expand_head if it is a +cloned gso skb (e.g if the packet has been sent though a veth device) + +Fixes: a09a4c8dd1ec ("tunnels: Remove encapsulation offloads on decap") +Signed-off-by: Lorenzo Bianconi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fou.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ipv4/fou.c ++++ b/net/ipv4/fou.c +@@ -121,6 +121,7 @@ static int gue_udp_recv(struct sock *sk, + struct guehdr *guehdr; + void *data; + u16 doffset = 0; ++ u8 proto_ctype; + + if (!fou) + return 1; +@@ -212,13 +213,14 @@ static int gue_udp_recv(struct sock *sk, + if (unlikely(guehdr->control)) + return gue_control_message(skb, guehdr); + ++ proto_ctype = guehdr->proto_ctype; + __skb_pull(skb, sizeof(struct udphdr) + hdrlen); + skb_reset_transport_header(skb); + + if (iptunnel_pull_offloads(skb)) + goto drop; + +- return -guehdr->proto_ctype; ++ return -proto_ctype; + + drop: + kfree_skb(skb); diff --git a/queue-5.0/net-mlx5-fpga-tls-hold-rcu-read-lock-a-bit-longer.patch b/queue-5.0/net-mlx5-fpga-tls-hold-rcu-read-lock-a-bit-longer.patch new file mode 100644 index 00000000000..b17bda4570f --- /dev/null +++ b/queue-5.0/net-mlx5-fpga-tls-hold-rcu-read-lock-a-bit-longer.patch @@ -0,0 +1,54 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Saeed Mahameed +Date: Tue, 19 Mar 2019 22:09:05 -0700 +Subject: net/mlx5: FPGA, tls, hold rcu read lock a bit longer + +From: Saeed Mahameed + +[ Upstream commit 31634bf5dcc418b5b2cacd954394c0c4620db6a2 ] + +To avoid use-after-free, hold the rcu read lock until we are done copying +flow data into the command buffer. + +Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines") +Reported-by: Eric Dumazet +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +@@ -217,22 +217,22 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_ + void *cmd; + int ret; + +- rcu_read_lock(); +- flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); +- rcu_read_unlock(); +- +- if (!flow) { +- WARN_ONCE(1, "Received NULL pointer for handle\n"); +- return -EINVAL; +- } +- + buf = kzalloc(size, GFP_ATOMIC); + if (!buf) + return -ENOMEM; + + cmd = (buf + 1); + ++ rcu_read_lock(); ++ flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); ++ if (unlikely(!flow)) { ++ rcu_read_unlock(); ++ WARN_ONCE(1, "Received NULL pointer for handle\n"); ++ kfree(buf); ++ return -EINVAL; ++ } + mlx5_fpga_tls_flow_to_cmd(flow, cmd); ++ rcu_read_unlock(); + + MLX5_SET(tls_cmd, cmd, swid, ntohl(handle)); + MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn)); diff --git a/queue-5.0/net-mlx5-fpga-tls-idr-remove-on-flow-delete.patch b/queue-5.0/net-mlx5-fpga-tls-idr-remove-on-flow-delete.patch new file mode 100644 index 00000000000..ca47af0729d --- /dev/null +++ b/queue-5.0/net-mlx5-fpga-tls-idr-remove-on-flow-delete.patch @@ -0,0 +1,139 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Saeed Mahameed +Date: Tue, 19 Mar 2019 01:05:41 -0700 +Subject: net/mlx5: FPGA, tls, idr remove on flow delete + +From: Saeed Mahameed + +[ Upstream commit df3a8344d404a810b4aadbf19b08c8232fbaa715 ] + +Flow is kfreed on mlx5_fpga_tls_del_flow but kept in the idr data +structure, this is risky and can cause use-after-free, since the +idr_remove is delayed until tls_send_teardown_cmd completion. + +Instead of delaying idr_remove, in this patch we do it on +mlx5_fpga_tls_del_flow, before actually kfree(flow). + +Added synchronize_rcu before kfree(flow) + +Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines") +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 43 +++++++-------------- + 1 file changed, 15 insertions(+), 28 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +@@ -148,14 +148,16 @@ static int mlx5_fpga_tls_alloc_swid(stru + return ret; + } + +-static void mlx5_fpga_tls_release_swid(struct idr *idr, +- spinlock_t *idr_spinlock, u32 swid) ++static void *mlx5_fpga_tls_release_swid(struct idr *idr, ++ spinlock_t *idr_spinlock, u32 swid) + { + unsigned long flags; ++ void *ptr; + + spin_lock_irqsave(idr_spinlock, flags); +- idr_remove(idr, swid); ++ ptr = idr_remove(idr, swid); + spin_unlock_irqrestore(idr_spinlock, flags); ++ return ptr; + } + + static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, +@@ -165,20 +167,12 @@ static void mlx_tls_kfree_complete(struc + kfree(buf); + } + +-struct mlx5_teardown_stream_context { +- struct mlx5_fpga_tls_command_context cmd; +- u32 swid; +-}; +- + static void + mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_tls_command_context *cmd, + struct mlx5_fpga_dma_buf *resp) + { +- struct mlx5_teardown_stream_context *ctx = +- container_of(cmd, struct mlx5_teardown_stream_context, cmd); +- + if (resp) { + u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); + +@@ -186,14 +180,6 @@ mlx5_fpga_tls_teardown_completion(struct + mlx5_fpga_err(fdev, + "Teardown stream failed with syndrome = %d", + syndrome); +- else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx)) +- mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr, +- &fdev->tls->tx_idr_spinlock, +- ctx->swid); +- else +- mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr, +- &fdev->tls->rx_idr_spinlock, +- ctx->swid); + } + mlx5_fpga_tls_put_command_ctx(cmd); + } +@@ -253,7 +239,7 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_ + static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, + void *flow, u32 swid, gfp_t flags) + { +- struct mlx5_teardown_stream_context *ctx; ++ struct mlx5_fpga_tls_command_context *ctx; + struct mlx5_fpga_dma_buf *buf; + void *cmd; + +@@ -261,7 +247,7 @@ static void mlx5_fpga_tls_send_teardown_ + if (!ctx) + return; + +- buf = &ctx->cmd.buf; ++ buf = &ctx->buf; + cmd = (ctx + 1); + MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM); + MLX5_SET(tls_cmd, cmd, swid, swid); +@@ -272,8 +258,7 @@ static void mlx5_fpga_tls_send_teardown_ + buf->sg[0].data = cmd; + buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; + +- ctx->swid = swid; +- mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd, ++ mlx5_fpga_tls_cmd_send(mdev->fpga, ctx, + mlx5_fpga_tls_teardown_completion); + } + +@@ -283,13 +268,14 @@ void mlx5_fpga_tls_del_flow(struct mlx5_ + struct mlx5_fpga_tls *tls = mdev->fpga->tls; + void *flow; + +- rcu_read_lock(); + if (direction_sx) +- flow = idr_find(&tls->tx_idr, swid); ++ flow = mlx5_fpga_tls_release_swid(&tls->tx_idr, ++ &tls->tx_idr_spinlock, ++ swid); + else +- flow = idr_find(&tls->rx_idr, swid); +- +- rcu_read_unlock(); ++ flow = mlx5_fpga_tls_release_swid(&tls->rx_idr, ++ &tls->rx_idr_spinlock, ++ swid); + + if (!flow) { + mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n", +@@ -297,6 +283,7 @@ void mlx5_fpga_tls_del_flow(struct mlx5_ + return; + } + ++ synchronize_rcu(); /* before kfree(flow) */ + mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags); + } + diff --git a/queue-5.0/net-mlx5e-protect-against-non-uplink-representor-for-encap.patch b/queue-5.0/net-mlx5e-protect-against-non-uplink-representor-for-encap.patch new file mode 100644 index 00000000000..7f089fb05af --- /dev/null +++ b/queue-5.0/net-mlx5e-protect-against-non-uplink-representor-for-encap.patch @@ -0,0 +1,36 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Dmytro Linkin +Date: Fri, 29 Mar 2019 12:50:37 +0000 +Subject: net/mlx5e: Protect against non-uplink representor for encap + +From: Dmytro Linkin + +[ Upstream commit 5e0060b1491b299b1706414e61ede0b02265680e ] + +TC encap offload is supported only for the physical uplink +representor. Fail for non uplink representor. + +Fixes: 3e621b19b0bb ("net/mlx5e: Support TC encapsulation offloads with upper devices") +Signed-off-by: Dmytro Linkin +Reviewed-by: Eli Britstein +Reviewed-by: Vlad Buslov +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +@@ -39,6 +39,10 @@ static int get_route_and_out_devs(struct + return -EOPNOTSUPP; + } + ++ if (!(mlx5e_eswitch_rep(*out_dev) && ++ mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) ++ return -EOPNOTSUPP; ++ + return 0; + } + diff --git a/queue-5.0/net-mlx5e-rx-check-ip-headers-sanity.patch b/queue-5.0/net-mlx5e-rx-check-ip-headers-sanity.patch new file mode 100644 index 00000000000..6a920c61d2b --- /dev/null +++ b/queue-5.0/net-mlx5e-rx-check-ip-headers-sanity.patch @@ -0,0 +1,45 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Saeed Mahameed +Date: Mon, 25 Mar 2019 22:10:59 -0700 +Subject: net/mlx5e: Rx, Check ip headers sanity + +From: Saeed Mahameed + +[ Upstream commit 0318a7b7fcad9765931146efa7ca3a034194737c ] + +In the two places is_last_ethertype_ip is being called, the caller will +be looking inside the ip header, to be safe, add ip{4,6} header sanity +check. And return true only on valid ip headers, i.e: the whole header +is contained in the linear part of the skb. + +Note: Such situation is very rare and hard to reproduce, since mlx5e +allocates a large enough headroom to contain the largest header one can +imagine. + +Fixes: fe1dc069990c ("net/mlx5e: don't set CHECKSUM_COMPLETE on SCTP packets") +Reported-by: Cong Wang +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -693,7 +693,14 @@ static inline bool is_last_ethertype_ip( + { + *proto = ((struct ethhdr *)skb->data)->h_proto; + *proto = __vlan_get_protocol(skb, *proto, network_depth); +- return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6)); ++ ++ if (*proto == htons(ETH_P_IP)) ++ return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr)); ++ ++ if (*proto == htons(ETH_P_IPV6)) ++ return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr)); ++ ++ return false; + } + + static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) diff --git a/queue-5.0/net-mlx5e-rx-fixup-skb-checksum-for-packets-with-tail-padding.patch b/queue-5.0/net-mlx5e-rx-fixup-skb-checksum-for-packets-with-tail-padding.patch new file mode 100644 index 00000000000..d7692b7a549 --- /dev/null +++ b/queue-5.0/net-mlx5e-rx-fixup-skb-checksum-for-packets-with-tail-padding.patch @@ -0,0 +1,183 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Saeed Mahameed +Date: Tue, 12 Mar 2019 00:24:52 -0700 +Subject: net/mlx5e: Rx, Fixup skb checksum for packets with tail padding + +From: Saeed Mahameed + +[ Upstream commit 0aa1d18615c163f92935b806dcaff9157645233a ] + +When an ethernet frame with ip payload is padded, the padding octets are +not covered by the hardware checksum. + +Prior to the cited commit, skb checksum was forced to be CHECKSUM_NONE +when padding is detected. After it, the kernel will try to trim the +padding bytes and subtract their checksum from skb->csum. + +In this patch we fixup skb->csum for any ip packet with tail padding of +any size, if any padding found. +FCS case is just one special case of this general purpose patch, hence, +it is removed. + +Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"), +Cc: Eric Dumazet +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 79 +++++++++++++++++---- + drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 6 + + drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 4 + + 3 files changed, 74 insertions(+), 15 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -713,17 +713,6 @@ static inline void mlx5e_enable_ecn(stru + rq->stats->ecn_mark += !!rc; + } + +-static u32 mlx5e_get_fcs(const struct sk_buff *skb) +-{ +- const void *fcs_bytes; +- u32 _fcs_bytes; +- +- fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN, +- ETH_FCS_LEN, &_fcs_bytes); +- +- return __get_unaligned_cpu32(fcs_bytes); +-} +- + static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) + { + void *ip_p = skb->data + network_depth; +@@ -734,6 +723,68 @@ static u8 get_ip_proto(struct sk_buff *s + + #define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + ++#define MAX_PADDING 8 ++ ++static void ++tail_padding_csum_slow(struct sk_buff *skb, int offset, int len, ++ struct mlx5e_rq_stats *stats) ++{ ++ stats->csum_complete_tail_slow++; ++ skb->csum = csum_block_add(skb->csum, ++ skb_checksum(skb, offset, len, 0), ++ offset); ++} ++ ++static void ++tail_padding_csum(struct sk_buff *skb, int offset, ++ struct mlx5e_rq_stats *stats) ++{ ++ u8 tail_padding[MAX_PADDING]; ++ int len = skb->len - offset; ++ void *tail; ++ ++ if (unlikely(len > MAX_PADDING)) { ++ tail_padding_csum_slow(skb, offset, len, stats); ++ return; ++ } ++ ++ tail = skb_header_pointer(skb, offset, len, tail_padding); ++ if (unlikely(!tail)) { ++ tail_padding_csum_slow(skb, offset, len, stats); ++ return; ++ } ++ ++ stats->csum_complete_tail++; ++ skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset); ++} ++ ++static void ++mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto, ++ struct mlx5e_rq_stats *stats) ++{ ++ struct ipv6hdr *ip6; ++ struct iphdr *ip4; ++ int pkt_len; ++ ++ switch (proto) { ++ case htons(ETH_P_IP): ++ ip4 = (struct iphdr *)(skb->data + network_depth); ++ pkt_len = network_depth + ntohs(ip4->tot_len); ++ break; ++ case htons(ETH_P_IPV6): ++ ip6 = (struct ipv6hdr *)(skb->data + network_depth); ++ pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len); ++ break; ++ default: ++ return; ++ } ++ ++ if (likely(pkt_len >= skb->len)) ++ return; ++ ++ tail_padding_csum(skb, pkt_len, stats); ++} ++ + static inline void mlx5e_handle_csum(struct net_device *netdev, + struct mlx5_cqe64 *cqe, + struct mlx5e_rq *rq, +@@ -782,10 +833,8 @@ static inline void mlx5e_handle_csum(str + skb->csum = csum_partial(skb->data + ETH_HLEN, + network_depth - ETH_HLEN, + skb->csum); +- if (unlikely(netdev->features & NETIF_F_RXFCS)) +- skb->csum = csum_block_add(skb->csum, +- (__force __wsum)mlx5e_get_fcs(skb), +- skb->len - ETH_FCS_LEN); ++ ++ mlx5e_skb_padding_csum(skb, network_depth, proto, stats); + stats->csum_complete++; + return; + } +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +@@ -59,6 +59,8 @@ static const struct counter_desc sw_stat + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, ++ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) }, ++ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) }, +@@ -151,6 +153,8 @@ void mlx5e_grp_sw_update_stats(struct ml + s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; + s->rx_csum_none += rq_stats->csum_none; + s->rx_csum_complete += rq_stats->csum_complete; ++ s->rx_csum_complete_tail += rq_stats->csum_complete_tail; ++ s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow; + s->rx_csum_unnecessary += rq_stats->csum_unnecessary; + s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; + s->rx_xdp_drop += rq_stats->xdp_drop; +@@ -1192,6 +1196,8 @@ static const struct counter_desc rq_stat + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, ++ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, ++ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +@@ -71,6 +71,8 @@ struct mlx5e_sw_stats { + u64 rx_csum_unnecessary; + u64 rx_csum_none; + u64 rx_csum_complete; ++ u64 rx_csum_complete_tail; ++ u64 rx_csum_complete_tail_slow; + u64 rx_csum_unnecessary_inner; + u64 rx_xdp_drop; + u64 rx_xdp_redirect; +@@ -181,6 +183,8 @@ struct mlx5e_rq_stats { + u64 packets; + u64 bytes; + u64 csum_complete; ++ u64 csum_complete_tail; ++ u64 csum_complete_tail_slow; + u64 csum_unnecessary; + u64 csum_unnecessary_inner; + u64 csum_none; diff --git a/queue-5.0/net-mlx5e-switch-to-toeplitz-rss-hash-by-default.patch b/queue-5.0/net-mlx5e-switch-to-toeplitz-rss-hash-by-default.patch new file mode 100644 index 00000000000..9564aa637a5 --- /dev/null +++ b/queue-5.0/net-mlx5e-switch-to-toeplitz-rss-hash-by-default.patch @@ -0,0 +1,41 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Konstantin Khlebnikov +Date: Fri, 31 Aug 2018 14:29:16 +0300 +Subject: net/mlx5e: Switch to Toeplitz RSS hash by default + +From: Konstantin Khlebnikov + +[ Upstream commit 7ee2ace9c544a0886e02b54b625e521df8692d20 ] + +Although XOR hash function can perform very well on some special use +cases, to align with all drivers, mlx5 driver should use Toeplitz hash +by default. +Toeplitz is more stable for the general use case and it is more standard +and reliable. + +On top of that, since XOR (MLX5_RX_HASH_FN_INVERTED_XOR8) gives only a +repeated 8 bits pattern. When used for udp tunneling RSS source port +manipulation it results in fixed source port, which will cause bad RSS +spread. + +Fixes: 2be6967cdbc9 ("net/mlx5e: Support ETH_RSS_HASH_XOR") +Signed-off-by: Konstantin Khlebnikov +Reviewed-by: Tariq Toukan +Signed-off-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -4574,7 +4574,7 @@ void mlx5e_build_rss_params(struct mlx5e + { + enum mlx5e_traffic_types tt; + +- rss_params->hfunc = ETH_RSS_HASH_XOR; ++ rss_params->hfunc = ETH_RSS_HASH_TOP; + netdev_rss_key_fill(rss_params->toeplitz_hash_key, + sizeof(rss_params->toeplitz_hash_key)); + mlx5e_build_default_indir_rqt(rss_params->indirection_rqt, diff --git a/queue-5.0/net-mlx5e-xdp-avoid-checksum-complete-when-xdp-prog-is-loaded.patch b/queue-5.0/net-mlx5e-xdp-avoid-checksum-complete-when-xdp-prog-is-loaded.patch new file mode 100644 index 00000000000..74b55e71bba --- /dev/null +++ b/queue-5.0/net-mlx5e-xdp-avoid-checksum-complete-when-xdp-prog-is-loaded.patch @@ -0,0 +1,64 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Saeed Mahameed +Date: Thu, 21 Mar 2019 19:07:20 -0700 +Subject: net/mlx5e: XDP, Avoid checksum complete when XDP prog is loaded + +From: Saeed Mahameed + +[ Upstream commit 5d0bb3bac4b9f6c22280b04545626fdfd99edc6b ] + +XDP programs might change packets data contents which will make the +reported skb checksum (checksum complete) invalid. + +When XDP programs are loaded/unloaded set/clear rx RQs +MLX5E_RQ_STATE_NO_CSUM_COMPLETE flag. + +Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support") +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 ++- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++++- + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 ++- + 3 files changed, 9 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +@@ -1665,7 +1665,8 @@ static int set_pflag_rx_no_csum_complete + struct mlx5e_channel *c; + int i; + +- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) ++ if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || ++ priv->channels.params.xdp_prog) + return 0; + + for (i = 0; i < channels->num; i++) { +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -950,7 +950,11 @@ static int mlx5e_open_rq(struct mlx5e_ch + if (params->rx_dim_enabled) + __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + +- if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE)) ++ /* We disable csum_complete when XDP is enabled since ++ * XDP programs might manipulate packets which will render ++ * skb->checksum incorrect. ++ */ ++ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + + return 0; +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -753,7 +753,8 @@ static inline void mlx5e_handle_csum(str + return; + } + +- if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))) ++ /* True when explicitly set via priv flag, or XDP prog is loaded */ ++ if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)) + goto csum_unnecessary; + + /* CQE csum doesn't cover padding octets in short ethernet diff --git a/queue-5.0/net-strparser-partially-revert-strparser-call-skb_unclone-conditionally.patch b/queue-5.0/net-strparser-partially-revert-strparser-call-skb_unclone-conditionally.patch new file mode 100644 index 00000000000..7b43b2a4cc2 --- /dev/null +++ b/queue-5.0/net-strparser-partially-revert-strparser-call-skb_unclone-conditionally.patch @@ -0,0 +1,90 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Jakub Kicinski +Date: Wed, 10 Apr 2019 11:04:32 -0700 +Subject: net: strparser: partially revert "strparser: Call skb_unclone conditionally" + +From: Jakub Kicinski + +[ Upstream commit 4a9c2e3746e6151fd5d077259d79ce9ca86d47d7 ] + +This reverts the first part of commit 4e485d06bb8c ("strparser: Call +skb_unclone conditionally"). To build a message with multiple +fragments we need our own root of frag_list. We can't simply +use the frag_list of orig_skb, because it will lead to linking +all orig_skbs together creating very long frag chains, and causing +stack overflow on kfree_skb() (which is called recursively on +the frag_lists). + +BUG: stack guard page was hit at 00000000d40fad41 (stack is 0000000029dde9f4..000000008cce03d5) +kernel stack overflow (double-fault): 0000 [#1] PREEMPT SMP +RIP: 0010:free_one_page+0x2b/0x490 + +Call Trace: + __free_pages_ok+0x143/0x2c0 + skb_release_data+0x8e/0x140 + ? skb_release_data+0xad/0x140 + kfree_skb+0x32/0xb0 + + [...] + + skb_release_data+0xad/0x140 + ? skb_release_data+0xad/0x140 + kfree_skb+0x32/0xb0 + skb_release_data+0xad/0x140 + ? skb_release_data+0xad/0x140 + kfree_skb+0x32/0xb0 + skb_release_data+0xad/0x140 + ? skb_release_data+0xad/0x140 + kfree_skb+0x32/0xb0 + skb_release_data+0xad/0x140 + ? skb_release_data+0xad/0x140 + kfree_skb+0x32/0xb0 + skb_release_data+0xad/0x140 + __kfree_skb+0xe/0x20 + tcp_disconnect+0xd6/0x4d0 + tcp_close+0xf4/0x430 + ? tcp_check_oom+0xf0/0xf0 + tls_sk_proto_close+0xe4/0x1e0 [tls] + inet_release+0x36/0x60 + __sock_release+0x37/0xa0 + sock_close+0x11/0x20 + __fput+0xa2/0x1d0 + task_work_run+0x89/0xb0 + exit_to_usermode_loop+0x9a/0xa0 + do_syscall_64+0xc0/0xf0 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Let's leave the second unclone conditional, as I'm not entirely +sure what is its purpose :) + +Fixes: 4e485d06bb8c ("strparser: Call skb_unclone conditionally") +Signed-off-by: Jakub Kicinski +Reviewed-by: Dirk van der Merwe +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/strparser/strparser.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/net/strparser/strparser.c ++++ b/net/strparser/strparser.c +@@ -140,13 +140,11 @@ static int __strp_recv(read_descriptor_t + /* We are going to append to the frags_list of head. + * Need to unshare the frag_list. + */ +- if (skb_has_frag_list(head)) { +- err = skb_unclone(head, GFP_ATOMIC); +- if (err) { +- STRP_STATS_INCR(strp->stats.mem_fail); +- desc->error = err; +- return 0; +- } ++ err = skb_unclone(head, GFP_ATOMIC); ++ if (err) { ++ STRP_STATS_INCR(strp->stats.mem_fail); ++ desc->error = err; ++ return 0; + } + + if (unlikely(skb_shinfo(head)->frag_list)) { diff --git a/queue-5.0/net-thunderx-don-t-allow-jumbo-frames-with-xdp.patch b/queue-5.0/net-thunderx-don-t-allow-jumbo-frames-with-xdp.patch new file mode 100644 index 00000000000..ef8ec8cf1d6 --- /dev/null +++ b/queue-5.0/net-thunderx-don-t-allow-jumbo-frames-with-xdp.patch @@ -0,0 +1,41 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Matteo Croce +Date: Thu, 11 Apr 2019 12:26:33 +0200 +Subject: net: thunderx: don't allow jumbo frames with XDP + +From: Matteo Croce + +[ Upstream commit 1f227d16083b2e280b7dde4ca78883d75593f2fd ] + +The thunderx driver forbids to load an eBPF program if the MTU is too high, +but this can be circumvented by loading the eBPF, then raising the MTU. + +Fix this by limiting the MTU if an eBPF program is already loaded. + +Fixes: 05c773f52b96e ("net: thunderx: Add basic XDP support") +Signed-off-by: Matteo Croce +Acked-by: Jesper Dangaard Brouer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cavium/thunder/nicvf_main.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c ++++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c +@@ -1589,6 +1589,15 @@ static int nicvf_change_mtu(struct net_d + struct nicvf *nic = netdev_priv(netdev); + int orig_mtu = netdev->mtu; + ++ /* For now just support only the usual MTU sized frames, ++ * plus some headroom for VLAN, QinQ. ++ */ ++ if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) { ++ netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", ++ netdev->mtu); ++ return -EINVAL; ++ } ++ + netdev->mtu = new_mtu; + + if (!netif_running(netdev)) diff --git a/queue-5.0/net-thunderx-raise-xdp-mtu-to-1508.patch b/queue-5.0/net-thunderx-raise-xdp-mtu-to-1508.patch new file mode 100644 index 00000000000..cdf25eea92b --- /dev/null +++ b/queue-5.0/net-thunderx-raise-xdp-mtu-to-1508.patch @@ -0,0 +1,55 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Matteo Croce +Date: Thu, 11 Apr 2019 12:26:32 +0200 +Subject: net: thunderx: raise XDP MTU to 1508 + +From: Matteo Croce + +[ Upstream commit 5ee15c101f29e0093ffb5448773ccbc786eb313b ] + +The thunderx driver splits frames bigger than 1530 bytes to multiple +pages, making impossible to run an eBPF program on it. +This leads to a maximum MTU of 1508 if QinQ is in use. + +The thunderx driver forbids to load an eBPF program if the MTU is higher +than 1500 bytes. Raise the limit to 1508 so it is possible to use L2 +protocols which need some more headroom. + +Fixes: 05c773f52b96e ("net: thunderx: Add basic XDP support") +Signed-off-by: Matteo Croce +Acked-by: Jesper Dangaard Brouer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cavium/thunder/nicvf_main.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c ++++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c +@@ -32,6 +32,13 @@ + #define DRV_NAME "nicvf" + #define DRV_VERSION "1.0" + ++/* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs ++ * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed ++ * this value, keeping headroom for the 14 byte Ethernet header and two ++ * VLAN tags (for QinQ) ++ */ ++#define MAX_XDP_MTU (1530 - ETH_HLEN - VLAN_HLEN * 2) ++ + /* Supported devices */ + static const struct pci_device_id nicvf_id_table[] = { + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, +@@ -1830,8 +1837,10 @@ static int nicvf_xdp_setup(struct nicvf + bool bpf_attached = false; + int ret = 0; + +- /* For now just support only the usual MTU sized frames */ +- if (prog && (dev->mtu > 1500)) { ++ /* For now just support only the usual MTU sized frames, ++ * plus some headroom for VLAN, QinQ. ++ */ ++ if (prog && dev->mtu > MAX_XDP_MTU) { + netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", + dev->mtu); + return -EOPNOTSUPP; diff --git a/queue-5.0/net-tls-don-t-leak-partially-sent-record-in-device-mode.patch b/queue-5.0/net-tls-don-t-leak-partially-sent-record-in-device-mode.patch new file mode 100644 index 00000000000..5b28961e34d --- /dev/null +++ b/queue-5.0/net-tls-don-t-leak-partially-sent-record-in-device-mode.patch @@ -0,0 +1,132 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Jakub Kicinski +Date: Wed, 10 Apr 2019 11:04:31 -0700 +Subject: net/tls: don't leak partially sent record in device mode + +From: Jakub Kicinski + +[ Upstream commit 35b71a34ada62c9573847a324bf06a133fe11b11 ] + +David reports that tls triggers warnings related to +sk->sk_forward_alloc not being zero at destruction time: + +WARNING: CPU: 5 PID: 6831 at net/core/stream.c:206 sk_stream_kill_queues+0x103/0x110 +WARNING: CPU: 5 PID: 6831 at net/ipv4/af_inet.c:160 inet_sock_destruct+0x15b/0x170 + +When sender fills up the write buffer and dies from +SIGPIPE. This is due to the device implementation +not cleaning up the partially_sent_record. + +This is because commit a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") +moved the partial record cleanup to the SW-only path. + +Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") +Reported-by: David Beckett +Signed-off-by: Jakub Kicinski +Reviewed-by: Dirk van der Merwe +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tls.h | 2 ++ + net/tls/tls_device.c | 7 +++++++ + net/tls/tls_main.c | 22 ++++++++++++++++++++++ + net/tls/tls_sw.c | 15 +-------------- + 4 files changed, 32 insertions(+), 14 deletions(-) + +--- a/include/net/tls.h ++++ b/include/net/tls.h +@@ -289,6 +289,7 @@ int tls_device_sendmsg(struct sock *sk, + int tls_device_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags); + void tls_device_sk_destruct(struct sock *sk); ++void tls_device_free_resources_tx(struct sock *sk); + void tls_device_init(void); + void tls_device_cleanup(void); + int tls_tx_records(struct sock *sk, int flags); +@@ -312,6 +313,7 @@ int tls_push_sg(struct sock *sk, struct + int flags); + int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, + int flags); ++bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx); + + int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx, + int flags, long *timeo); +--- a/net/tls/tls_device.c ++++ b/net/tls/tls_device.c +@@ -219,6 +219,13 @@ void tls_device_sk_destruct(struct sock + } + EXPORT_SYMBOL(tls_device_sk_destruct); + ++void tls_device_free_resources_tx(struct sock *sk) ++{ ++ struct tls_context *tls_ctx = tls_get_ctx(sk); ++ ++ tls_free_partial_record(sk, tls_ctx); ++} ++ + static void tls_append_frag(struct tls_record_info *record, + struct page_frag *pfrag, + int size) +--- a/net/tls/tls_main.c ++++ b/net/tls/tls_main.c +@@ -220,6 +220,26 @@ int tls_push_pending_closed_record(struc + return tls_ctx->push_pending_record(sk, flags); + } + ++bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx) ++{ ++ struct scatterlist *sg; ++ ++ sg = ctx->partially_sent_record; ++ if (!sg) ++ return false; ++ ++ while (1) { ++ put_page(sg_page(sg)); ++ sk_mem_uncharge(sk, sg->length); ++ ++ if (sg_is_last(sg)) ++ break; ++ sg++; ++ } ++ ctx->partially_sent_record = NULL; ++ return true; ++} ++ + static void tls_write_space(struct sock *sk) + { + struct tls_context *ctx = tls_get_ctx(sk); +@@ -278,6 +298,8 @@ static void tls_sk_proto_close(struct so + kfree(ctx->tx.rec_seq); + kfree(ctx->tx.iv); + tls_sw_free_resources_tx(sk); ++ } else if (ctx->tx_conf == TLS_HW) { ++ tls_device_free_resources_tx(sk); + } + + if (ctx->rx_conf == TLS_SW) { +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1804,20 +1804,7 @@ void tls_sw_free_resources_tx(struct soc + /* Free up un-sent records in tx_list. First, free + * the partially sent record if any at head of tx_list. + */ +- if (tls_ctx->partially_sent_record) { +- struct scatterlist *sg = tls_ctx->partially_sent_record; +- +- while (1) { +- put_page(sg_page(sg)); +- sk_mem_uncharge(sk, sg->length); +- +- if (sg_is_last(sg)) +- break; +- sg++; +- } +- +- tls_ctx->partially_sent_record = NULL; +- ++ if (tls_free_partial_record(sk, tls_ctx)) { + rec = list_first_entry(&ctx->tx_list, + struct tls_rec, list); + list_del(&rec->list); diff --git a/queue-5.0/net-tls-fix-build-without-config_tls_device.patch b/queue-5.0/net-tls-fix-build-without-config_tls_device.patch new file mode 100644 index 00000000000..eba0ad964c9 --- /dev/null +++ b/queue-5.0/net-tls-fix-build-without-config_tls_device.patch @@ -0,0 +1,34 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Jakub Kicinski +Date: Wed, 10 Apr 2019 16:23:39 -0700 +Subject: net/tls: fix build without CONFIG_TLS_DEVICE + +From: Jakub Kicinski + +[ Upstream commit 903f1a187776bb8d79b13618ec05b25f86318885 ] + +buildbot noticed that TLS_HW is not defined if CONFIG_TLS_DEVICE=n. +Wrap the cleanup branch into an ifdef, tls_device_free_resources_tx() +wouldn't be compiled either in this case. + +Fixes: 35b71a34ada6 ("net/tls: don't leak partially sent record in device mode") +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tls/tls_main.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/tls/tls_main.c ++++ b/net/tls/tls_main.c +@@ -298,8 +298,10 @@ static void tls_sk_proto_close(struct so + kfree(ctx->tx.rec_seq); + kfree(ctx->tx.iv); + tls_sw_free_resources_tx(sk); ++#ifdef CONFIG_TLS_DEVICE + } else if (ctx->tx_conf == TLS_HW) { + tls_device_free_resources_tx(sk); ++#endif + } + + if (ctx->rx_conf == TLS_SW) { diff --git a/queue-5.0/net-tls-fix-the-iv-leaks.patch b/queue-5.0/net-tls-fix-the-iv-leaks.patch new file mode 100644 index 00000000000..08c0be0ce93 --- /dev/null +++ b/queue-5.0/net-tls-fix-the-iv-leaks.patch @@ -0,0 +1,40 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Jakub Kicinski +Date: Wed, 10 Apr 2019 11:04:30 -0700 +Subject: net/tls: fix the IV leaks + +From: Jakub Kicinski + +[ Upstream commit 5a03bc73abed6ae196c15e9950afde19d48be12c ] + +Commit f66de3ee2c16 ("net/tls: Split conf to rx + tx") made +freeing of IV and record sequence number conditional to SW +path only, but commit e8f69799810c ("net/tls: Add generic NIC +offload infrastructure") also allocates that state for the +device offload configuration. Remember to free it. + +Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") +Signed-off-by: Jakub Kicinski +Reviewed-by: Dirk van der Merwe +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tls/tls_device.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/tls/tls_device.c ++++ b/net/tls/tls_device.c +@@ -52,8 +52,11 @@ static DEFINE_SPINLOCK(tls_device_lock); + + static void tls_device_free_ctx(struct tls_context *ctx) + { +- if (ctx->tx_conf == TLS_HW) ++ if (ctx->tx_conf == TLS_HW) { + kfree(tls_offload_ctx_tx(ctx)); ++ kfree(ctx->tx.rec_seq); ++ kfree(ctx->tx.iv); ++ } + + if (ctx->rx_conf == TLS_HW) + kfree(tls_offload_ctx_rx(ctx)); diff --git a/queue-5.0/net-tls-prevent-bad-memory-access-in-tls_is_sk_tx_device_offloaded.patch b/queue-5.0/net-tls-prevent-bad-memory-access-in-tls_is_sk_tx_device_offloaded.patch new file mode 100644 index 00000000000..abc3250e6ce --- /dev/null +++ b/queue-5.0/net-tls-prevent-bad-memory-access-in-tls_is_sk_tx_device_offloaded.patch @@ -0,0 +1,36 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Jakub Kicinski +Date: Mon, 8 Apr 2019 17:59:50 -0700 +Subject: net/tls: prevent bad memory access in tls_is_sk_tx_device_offloaded() + +From: Jakub Kicinski + +[ Upstream commit b4f47f3848eb70986f75d06112af7b48b7f5f462 ] + +Unlike '&&' operator, the '&' does not have short-circuit +evaluation semantics. IOW both sides of the operator always +get evaluated. Fix the wrong operator in +tls_is_sk_tx_device_offloaded(), which would lead to +out-of-bounds access for for non-full sockets. + +Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload") +Signed-off-by: Jakub Kicinski +Reviewed-by: Dirk van der Merwe +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tls.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/net/tls.h ++++ b/include/net/tls.h +@@ -366,7 +366,7 @@ tls_validate_xmit_skb(struct sock *sk, s + static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) + { + #ifdef CONFIG_SOCK_VALIDATE_XMIT +- return sk_fullsock(sk) & ++ return sk_fullsock(sk) && + (smp_load_acquire(&sk->sk_validate_xmit_skb) == + &tls_validate_xmit_skb); + #else diff --git a/queue-5.0/nfc-nci-add-some-bounds-checking-in-nci_hci_cmd_received.patch b/queue-5.0/nfc-nci-add-some-bounds-checking-in-nci_hci_cmd_received.patch new file mode 100644 index 00000000000..7f8d7fe99fd --- /dev/null +++ b/queue-5.0/nfc-nci-add-some-bounds-checking-in-nci_hci_cmd_received.patch @@ -0,0 +1,51 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Dan Carpenter +Date: Wed, 3 Apr 2019 10:12:48 +0300 +Subject: NFC: nci: Add some bounds checking in nci_hci_cmd_received() + +From: Dan Carpenter + +[ Upstream commit d7ee81ad09f072eab1681877fc71ec05f9c1ae92 ] + +This is similar to commit 674d9de02aa7 ("NFC: Fix possible memory +corruption when handling SHDLC I-Frame commands"). + +I'm not totally sure, but I think that commit description may have +overstated the danger. I was under the impression that this data came +from the firmware? If you can't trust your networking firmware, then +you're already in trouble. + +Anyway, these days we add bounds checking where ever we can and we call +it kernel hardening. Better safe than sorry. + +Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") +Signed-off-by: Dan Carpenter +Signed-off-by: Greg Kroah-Hartman +--- + net/nfc/nci/hci.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/net/nfc/nci/hci.c ++++ b/net/nfc/nci/hci.c +@@ -312,6 +312,10 @@ static void nci_hci_cmd_received(struct + create_info = (struct nci_hci_create_pipe_resp *)skb->data; + dest_gate = create_info->dest_gate; + new_pipe = create_info->pipe; ++ if (new_pipe >= NCI_HCI_MAX_PIPES) { ++ status = NCI_HCI_ANY_E_NOK; ++ goto exit; ++ } + + /* Save the new created pipe and bind with local gate, + * the description for skb->data[3] is destination gate id +@@ -336,6 +340,10 @@ static void nci_hci_cmd_received(struct + goto exit; + } + delete_info = (struct nci_hci_delete_pipe_noti *)skb->data; ++ if (delete_info->pipe >= NCI_HCI_MAX_PIPES) { ++ status = NCI_HCI_ANY_E_NOK; ++ goto exit; ++ } + + ndev->hci_dev->pipes[delete_info->pipe].gate = + NCI_HCI_INVALID_GATE; diff --git a/queue-5.0/nfc-nci-potential-off-by-one-in-pipes-array.patch b/queue-5.0/nfc-nci-potential-off-by-one-in-pipes-array.patch new file mode 100644 index 00000000000..3cff1020ce5 --- /dev/null +++ b/queue-5.0/nfc-nci-potential-off-by-one-in-pipes-array.patch @@ -0,0 +1,38 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Dan Carpenter +Date: Wed, 3 Apr 2019 10:13:51 +0300 +Subject: nfc: nci: Potential off by one in ->pipes[] array + +From: Dan Carpenter + +[ Upstream commit 6491d698396fd5da4941980a35ca7c162a672016 ] + +This is similar to commit e285d5bfb7e9 ("NFC: Fix the number of pipes") +where we changed NFC_HCI_MAX_PIPES from 127 to 128. + +As the comment next to the define explains, the pipe identifier is 7 +bits long. The highest possible pipe is 127, but the number of possible +pipes is 128. As the code is now, then there is potential for an +out of bounds array access: + + net/nfc/nci/hci.c:297 nci_hci_cmd_received() warn: array off by one? + 'ndev->hci_dev->pipes[pipe]' '0-127 == 127' + +Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") +Signed-off-by: Dan Carpenter +Signed-off-by: Greg Kroah-Hartman +--- + include/net/nfc/nci_core.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/net/nfc/nci_core.h ++++ b/include/net/nfc/nci_core.h +@@ -166,7 +166,7 @@ struct nci_conn_info { + * According to specification 102 622 chapter 4.4 Pipes, + * the pipe identifier is 7 bits long. + */ +-#define NCI_HCI_MAX_PIPES 127 ++#define NCI_HCI_MAX_PIPES 128 + + struct nci_hci_gate { + u8 gate; diff --git a/queue-5.0/nfp-flower-remove-vlan-cfi-bit-from-push-vlan-action.patch b/queue-5.0/nfp-flower-remove-vlan-cfi-bit-from-push-vlan-action.patch new file mode 100644 index 00000000000..4df5516cc08 --- /dev/null +++ b/queue-5.0/nfp-flower-remove-vlan-cfi-bit-from-push-vlan-action.patch @@ -0,0 +1,43 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Pieter Jansen van Vuuren +Date: Mon, 1 Apr 2019 19:36:34 -0700 +Subject: nfp: flower: remove vlan CFI bit from push vlan action + +From: Pieter Jansen van Vuuren + +[ Upstream commit 42cd5484a22f1a1b947e21e2af65fa7dab09d017 ] + +We no longer set CFI when pushing vlan tags, therefore we remove +the CFI bit from push vlan. + +Fixes: 1a1e586f54bf ("nfp: add basic action capabilities to flower offloads") +Signed-off-by: Pieter Jansen van Vuuren +Signed-off-by: Louis Peens +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/flower/action.c | 3 +-- + drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 1 - + 2 files changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/net/ethernet/netronome/nfp/flower/action.c ++++ b/drivers/net/ethernet/netronome/nfp/flower/action.c +@@ -49,8 +49,7 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan + + tmp_push_vlan_tci = + FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, tcf_vlan_push_prio(action)) | +- FIELD_PREP(NFP_FL_PUSH_VLAN_VID, tcf_vlan_push_vid(action)) | +- NFP_FL_PUSH_VLAN_CFI; ++ FIELD_PREP(NFP_FL_PUSH_VLAN_VID, tcf_vlan_push_vid(action)); + push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); + } + +--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h ++++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +@@ -82,7 +82,6 @@ + #define NFP_FL_OUT_FLAGS_TYPE_IDX GENMASK(2, 0) + + #define NFP_FL_PUSH_VLAN_PRIO GENMASK(15, 13) +-#define NFP_FL_PUSH_VLAN_CFI BIT(12) + #define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0) + + #define IPV6_FLOW_LABEL_MASK cpu_to_be32(0x000fffff) diff --git a/queue-5.0/nfp-flower-replace-cfi-with-vlan-present.patch b/queue-5.0/nfp-flower-replace-cfi-with-vlan-present.patch new file mode 100644 index 00000000000..4b62ca83492 --- /dev/null +++ b/queue-5.0/nfp-flower-replace-cfi-with-vlan-present.patch @@ -0,0 +1,57 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Pieter Jansen van Vuuren +Date: Mon, 1 Apr 2019 19:36:33 -0700 +Subject: nfp: flower: replace CFI with vlan present + +From: Pieter Jansen van Vuuren + +[ Upstream commit f7ee799a51ddbcc205ef615fe424fb5084e9e0aa ] + +Replace vlan CFI bit with a vlan present bit that indicates the +presence of a vlan tag. Previously the driver incorrectly assumed +that an vlan id of 0 is not matchable, therefore we indicate vlan +presence with a vlan present bit. + +Fixes: 5571e8c9f241 ("nfp: extend flower matching capabilities") +Signed-off-by: Pieter Jansen van Vuuren +Signed-off-by: Louis Peens +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 2 +- + drivers/net/ethernet/netronome/nfp/flower/match.c | 14 ++++++-------- + 2 files changed, 7 insertions(+), 9 deletions(-) + +--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h ++++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +@@ -26,7 +26,7 @@ + #define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) + + #define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) +-#define NFP_FLOWER_MASK_VLAN_CFI BIT(12) ++#define NFP_FLOWER_MASK_VLAN_PRESENT BIT(12) + #define NFP_FLOWER_MASK_VLAN_VID GENMASK(11, 0) + + #define NFP_FLOWER_MASK_MPLS_LB GENMASK(31, 12) +--- a/drivers/net/ethernet/netronome/nfp/flower/match.c ++++ b/drivers/net/ethernet/netronome/nfp/flower/match.c +@@ -26,14 +26,12 @@ nfp_flower_compile_meta_tci(struct nfp_f + FLOW_DISSECTOR_KEY_VLAN, + target); + /* Populate the tci field. */ +- if (flow_vlan->vlan_id || flow_vlan->vlan_priority) { +- tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, +- flow_vlan->vlan_priority) | +- FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, +- flow_vlan->vlan_id) | +- NFP_FLOWER_MASK_VLAN_CFI; +- frame->tci = cpu_to_be16(tmp_tci); +- } ++ tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT; ++ tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, ++ flow_vlan->vlan_priority) | ++ FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, ++ flow_vlan->vlan_id); ++ frame->tci = cpu_to_be16(tmp_tci); + } + } + diff --git a/queue-5.0/revert-net-mlx5e-enable-reporting-checksum-unnecessary-also-for-l3-packets.patch b/queue-5.0/revert-net-mlx5e-enable-reporting-checksum-unnecessary-also-for-l3-packets.patch new file mode 100644 index 00000000000..694f53eb58d --- /dev/null +++ b/queue-5.0/revert-net-mlx5e-enable-reporting-checksum-unnecessary-also-for-l3-packets.patch @@ -0,0 +1,46 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Or Gerlitz +Date: Sun, 31 Mar 2019 12:53:03 +0000 +Subject: Revert "net/mlx5e: Enable reporting checksum unnecessary also for L3 packets" + +From: Or Gerlitz + +[ Upstream commit 8c8811d46d00d119ffbe039a6e52a0b504df1c2c ] + +This reverts commit b820e6fb0978f9c2ac438c199d2bb2f35950e9c9. + +Prior the commit we are reverting, checksum unnecessary was only set when +both the L3 OK and L4 OK bits are set on the CQE. This caused packets +of IP protocols such as SCTP which are not dealt by the current HW L4 +parser (hence the L4 OK bit is not set, but the L4 header type none bit +is set) to go through the checksum none code, where currently we wrongly +report checksum unnecessary for them, a regression. Fix this by a revert. + +Note that on our usual track we report checksum complete, so the revert +isn't expected to have any notable performance impact. Also, when we are +not on the checksum complete track, the L4 protocols for which we report +checksum none are not high performance ones, we will still report +checksum unnecessary for UDP/TCP. + +Fixes: b820e6fb0978 ("net/mlx5e: Enable reporting checksum unnecessary also for L3 packets") +Signed-off-by: Or Gerlitz +Reported-by: Avi Urman +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -848,8 +848,7 @@ static inline void mlx5e_handle_csum(str + + csum_unnecessary: + if (likely((cqe->hds_ip_ext & CQE_L3_OK) && +- ((cqe->hds_ip_ext & CQE_L4_OK) || +- (get_cqe_l4_hdr_type(cqe) == CQE_L4_HDR_TYPE_NONE)))) { ++ (cqe->hds_ip_ext & CQE_L4_OK))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (cqe_is_tunneled(cqe)) { + skb->csum_level = 1; diff --git a/queue-5.0/route-avoid-crash-from-dereferencing-null-rt-from.patch b/queue-5.0/route-avoid-crash-from-dereferencing-null-rt-from.patch new file mode 100644 index 00000000000..f6a4f6c326a --- /dev/null +++ b/queue-5.0/route-avoid-crash-from-dereferencing-null-rt-from.patch @@ -0,0 +1,54 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Jonathan Lemon +Date: Sun, 14 Apr 2019 14:21:29 -0700 +Subject: route: Avoid crash from dereferencing NULL rt->from + +From: Jonathan Lemon + +[ Upstream commit 9c69a13205151c0d801de9f9d83a818e6e8f60ec ] + +When __ip6_rt_update_pmtu() is called, rt->from is RCU dereferenced, but is +never checked for null - rt6_flush_exceptions() may have removed the entry. + +[ 1913.989004] RIP: 0010:ip6_rt_cache_alloc+0x13/0x170 +[ 1914.209410] Call Trace: +[ 1914.214798] +[ 1914.219226] __ip6_rt_update_pmtu+0xb0/0x190 +[ 1914.228649] ip6_tnl_xmit+0x2c2/0x970 [ip6_tunnel] +[ 1914.239223] ? ip6_tnl_parse_tlv_enc_lim+0x32/0x1a0 [ip6_tunnel] +[ 1914.252489] ? __gre6_xmit+0x148/0x530 [ip6_gre] +[ 1914.262678] ip6gre_tunnel_xmit+0x17e/0x3c7 [ip6_gre] +[ 1914.273831] dev_hard_start_xmit+0x8d/0x1f0 +[ 1914.283061] sch_direct_xmit+0xfa/0x230 +[ 1914.291521] __qdisc_run+0x154/0x4b0 +[ 1914.299407] net_tx_action+0x10e/0x1f0 +[ 1914.307678] __do_softirq+0xca/0x297 +[ 1914.315567] irq_exit+0x96/0xa0 +[ 1914.322494] smp_apic_timer_interrupt+0x68/0x130 +[ 1914.332683] apic_timer_interrupt+0xf/0x20 +[ 1914.341721] + +Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected") +Signed-off-by: Jonathan Lemon +Reviewed-by: Eric Dumazet +Reviewed-by: David Ahern +Reviewed-by: Martin KaFai Lau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -2336,6 +2336,10 @@ static void __ip6_rt_update_pmtu(struct + + rcu_read_lock(); + from = rcu_dereference(rt6->from); ++ if (!from) { ++ rcu_read_unlock(); ++ return; ++ } + nrt6 = ip6_rt_cache_alloc(from, daddr, saddr); + if (nrt6) { + rt6_do_update_pmtu(nrt6, mtu); diff --git a/queue-5.0/sch_cake-make-sure-we-can-write-the-ip-header-before-changing-dscp-bits.patch b/queue-5.0/sch_cake-make-sure-we-can-write-the-ip-header-before-changing-dscp-bits.patch new file mode 100644 index 00000000000..aa4c64460f8 --- /dev/null +++ b/queue-5.0/sch_cake-make-sure-we-can-write-the-ip-header-before-changing-dscp-bits.patch @@ -0,0 +1,50 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: "Toke Høiland-Jørgensen" +Date: Thu, 4 Apr 2019 15:01:33 +0200 +Subject: sch_cake: Make sure we can write the IP header before changing DSCP bits + +From: "Toke Høiland-Jørgensen" + +[ Upstream commit c87b4ecdbe8db27867a7b7f840291cd843406bd7 ] + +There is not actually any guarantee that the IP headers are valid before we +access the DSCP bits of the packets. Fix this using the same approach taken +in sch_dsmark. + +Reported-by: Kevin Darbyshire-Bryant +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_cake.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/net/sched/sch_cake.c ++++ b/net/sched/sch_cake.c +@@ -1524,16 +1524,27 @@ static void cake_wash_diffserv(struct sk + + static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) + { ++ int wlen = skb_network_offset(skb); + u8 dscp; + + switch (tc_skb_protocol(skb)) { + case htons(ETH_P_IP): ++ wlen += sizeof(struct iphdr); ++ if (!pskb_may_pull(skb, wlen) || ++ skb_try_make_writable(skb, wlen)) ++ return 0; ++ + dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; + if (wash && dscp) + ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); + return dscp; + + case htons(ETH_P_IPV6): ++ wlen += sizeof(struct ipv6hdr); ++ if (!pskb_may_pull(skb, wlen) || ++ skb_try_make_writable(skb, wlen)) ++ return 0; ++ + dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; + if (wash && dscp) + ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); diff --git a/queue-5.0/sch_cake-simplify-logic-in-cake_select_tin.patch b/queue-5.0/sch_cake-simplify-logic-in-cake_select_tin.patch new file mode 100644 index 00000000000..a1f760273f3 --- /dev/null +++ b/queue-5.0/sch_cake-simplify-logic-in-cake_select_tin.patch @@ -0,0 +1,86 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: "Toke Høiland-Jørgensen" +Date: Fri, 5 Apr 2019 15:01:59 +0200 +Subject: sch_cake: Simplify logic in cake_select_tin() + +From: "Toke Høiland-Jørgensen" + +[ Upstream commit 4976e3c683f328bc6f2edef555a4ffee6524486f ] + +The logic in cake_select_tin() was getting a bit hairy, and it turns out we +can simplify it quite a bit. This also allows us to get rid of one of the +two diffserv parsing functions, which has the added benefit that +already-zeroed DSCP fields won't get re-written. + +Suggested-by: Kevin Darbyshire-Bryant +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_cake.c | 44 ++++++++++++++++---------------------------- + 1 file changed, 16 insertions(+), 28 deletions(-) + +--- a/net/sched/sch_cake.c ++++ b/net/sched/sch_cake.c +@@ -1508,20 +1508,6 @@ static unsigned int cake_drop(struct Qdi + return idx + (tin << 16); + } + +-static void cake_wash_diffserv(struct sk_buff *skb) +-{ +- switch (skb->protocol) { +- case htons(ETH_P_IP): +- ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); +- break; +- case htons(ETH_P_IPV6): +- ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); +- break; +- default: +- break; +- } +-} +- + static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) + { + int wlen = skb_network_offset(skb); +@@ -1564,25 +1550,27 @@ static struct cake_tin_data *cake_select + { + struct cake_sched_data *q = qdisc_priv(sch); + u32 tin; ++ u8 dscp; ++ ++ /* Tin selection: Default to diffserv-based selection, allow overriding ++ * using firewall marks or skb->priority. ++ */ ++ dscp = cake_handle_diffserv(skb, ++ q->rate_flags & CAKE_FLAG_WASH); ++ ++ if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT) ++ tin = 0; + +- if (TC_H_MAJ(skb->priority) == sch->handle && +- TC_H_MIN(skb->priority) > 0 && +- TC_H_MIN(skb->priority) <= q->tin_cnt) { ++ else if (TC_H_MAJ(skb->priority) == sch->handle && ++ TC_H_MIN(skb->priority) > 0 && ++ TC_H_MIN(skb->priority) <= q->tin_cnt) + tin = q->tin_order[TC_H_MIN(skb->priority) - 1]; + +- if (q->rate_flags & CAKE_FLAG_WASH) +- cake_wash_diffserv(skb); +- } else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) { +- /* extract the Diffserv Precedence field, if it exists */ +- /* and clear DSCP bits if washing */ +- tin = q->tin_index[cake_handle_diffserv(skb, +- q->rate_flags & CAKE_FLAG_WASH)]; ++ else { ++ tin = q->tin_index[dscp]; ++ + if (unlikely(tin >= q->tin_cnt)) + tin = 0; +- } else { +- tin = 0; +- if (q->rate_flags & CAKE_FLAG_WASH) +- cake_wash_diffserv(skb); + } + + return &q->tins[tin]; diff --git a/queue-5.0/sch_cake-use-tc_skb_protocol-helper-for-getting-packet-protocol.patch b/queue-5.0/sch_cake-use-tc_skb_protocol-helper-for-getting-packet-protocol.patch new file mode 100644 index 00000000000..d25b3dff4b6 --- /dev/null +++ b/queue-5.0/sch_cake-use-tc_skb_protocol-helper-for-getting-packet-protocol.patch @@ -0,0 +1,31 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: "Toke Høiland-Jørgensen" +Date: Thu, 4 Apr 2019 15:01:33 +0200 +Subject: sch_cake: Use tc_skb_protocol() helper for getting packet protocol + +From: "Toke Høiland-Jørgensen" + +[ Upstream commit b2100cc56fca8c51d28aa42a9f1fbcb2cf351996 ] + +We shouldn't be using skb->protocol directly as that will miss cases with +hardware-accelerated VLAN tags. Use the helper instead to get the right +protocol number. + +Reported-by: Kevin Darbyshire-Bryant +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_cake.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sched/sch_cake.c ++++ b/net/sched/sch_cake.c +@@ -1526,7 +1526,7 @@ static u8 cake_handle_diffserv(struct sk + { + u8 dscp; + +- switch (skb->protocol) { ++ switch (tc_skb_protocol(skb)) { + case htons(ETH_P_IP): + dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; + if (wash && dscp) diff --git a/queue-5.0/series b/queue-5.0/series new file mode 100644 index 00000000000..14042580d74 --- /dev/null +++ b/queue-5.0/series @@ -0,0 +1,43 @@ +bonding-fix-event-handling-for-stacked-bonds.patch +failover-allow-name-change-on-iff_up-slave-interfaces.patch +net-atm-fix-potential-spectre-v1-vulnerabilities.patch +net-bridge-fix-per-port-af_packet-sockets.patch +net-bridge-multicast-use-rcu-to-access-port-list-from-br_multicast_start_querier.patch +net-fec-manage-ahb-clock-in-runtime-pm.patch +net-fix-missing-meta-data-in-skb-with-vlan-packet.patch +net-fou-do-not-use-guehdr-after-iptunnel_pull_offloads-in-gue_udp_recv.patch +tcp-tcp_grow_window-needs-to-respect-tcp_space.patch +team-set-slave-to-promisc-if-team-is-already-in-promisc-mode.patch +tipc-missing-entries-in-name-table-of-publications.patch +vhost-reject-zero-size-iova-range.patch +ipv4-recompile-ip-options-in-ipv4_link_failure.patch +ipv4-ensure-rcu_read_lock-in-ipv4_link_failure.patch +mlxsw-spectrum_switchdev-add-mdb-entries-in-prepare-phase.patch +mlxsw-core-do-not-use-wq_mem_reclaim-for-emad-workqueue.patch +mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-ordered-workqueue.patch +mlxsw-core-do-not-use-wq_mem_reclaim-for-mlxsw-workqueue.patch +mlxsw-spectrum_router-do-not-check-vrf-mac-address.patch +net-thunderx-raise-xdp-mtu-to-1508.patch +net-thunderx-don-t-allow-jumbo-frames-with-xdp.patch +net-tls-fix-the-iv-leaks.patch +net-tls-don-t-leak-partially-sent-record-in-device-mode.patch +net-strparser-partially-revert-strparser-call-skb_unclone-conditionally.patch +net-tls-fix-build-without-config_tls_device.patch +net-bridge-fix-netlink-export-of-vlan_stats_per_port-option.patch +net-mlx5e-xdp-avoid-checksum-complete-when-xdp-prog-is-loaded.patch +net-mlx5e-protect-against-non-uplink-representor-for-encap.patch +net-mlx5e-switch-to-toeplitz-rss-hash-by-default.patch +net-mlx5e-rx-fixup-skb-checksum-for-packets-with-tail-padding.patch +net-mlx5e-rx-check-ip-headers-sanity.patch +revert-net-mlx5e-enable-reporting-checksum-unnecessary-also-for-l3-packets.patch +net-mlx5-fpga-tls-hold-rcu-read-lock-a-bit-longer.patch +net-tls-prevent-bad-memory-access-in-tls_is_sk_tx_device_offloaded.patch +net-mlx5-fpga-tls-idr-remove-on-flow-delete.patch +route-avoid-crash-from-dereferencing-null-rt-from.patch +nfp-flower-replace-cfi-with-vlan-present.patch +nfp-flower-remove-vlan-cfi-bit-from-push-vlan-action.patch +sch_cake-use-tc_skb_protocol-helper-for-getting-packet-protocol.patch +sch_cake-make-sure-we-can-write-the-ip-header-before-changing-dscp-bits.patch +nfc-nci-add-some-bounds-checking-in-nci_hci_cmd_received.patch +nfc-nci-potential-off-by-one-in-pipes-array.patch +sch_cake-simplify-logic-in-cake_select_tin.patch diff --git a/queue-5.0/tcp-tcp_grow_window-needs-to-respect-tcp_space.patch b/queue-5.0/tcp-tcp_grow_window-needs-to-respect-tcp_space.patch new file mode 100644 index 00000000000..bace19c234a --- /dev/null +++ b/queue-5.0/tcp-tcp_grow_window-needs-to-respect-tcp_space.patch @@ -0,0 +1,63 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Eric Dumazet +Date: Tue, 16 Apr 2019 10:55:20 -0700 +Subject: tcp: tcp_grow_window() needs to respect tcp_space() + +From: Eric Dumazet + +[ Upstream commit 50ce163a72d817a99e8974222dcf2886d5deb1ae ] + +For some reason, tcp_grow_window() correctly tests if enough room +is present before attempting to increase tp->rcv_ssthresh, +but does not prevent it to grow past tcp_space() + +This is causing hard to debug issues, like failing +the (__tcp_select_window(sk) >= tp->rcv_wnd) test +in __tcp_ack_snd_check(), causing ACK delays and possibly +slow flows. + +Depending on tcp_rmem[2], MTU, skb->len/skb->truesize ratio, +we can see the problem happening on "netperf -t TCP_RR -- -r 2000,2000" +after about 60 round trips, when the active side no longer sends +immediate acks. + +This bug predates git history. + +Signed-off-by: Eric Dumazet +Acked-by: Soheil Hassas Yeganeh +Acked-by: Neal Cardwell +Acked-by: Wei Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -402,11 +402,12 @@ static int __tcp_grow_window(const struc + static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) + { + struct tcp_sock *tp = tcp_sk(sk); ++ int room; ++ ++ room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh; + + /* Check #1 */ +- if (tp->rcv_ssthresh < tp->window_clamp && +- (int)tp->rcv_ssthresh < tcp_space(sk) && +- !tcp_under_memory_pressure(sk)) { ++ if (room > 0 && !tcp_under_memory_pressure(sk)) { + int incr; + + /* Check #2. Increase window, if skb with such overhead +@@ -419,8 +420,7 @@ static void tcp_grow_window(struct sock + + if (incr) { + incr = max_t(int, incr, 2 * skb->len); +- tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, +- tp->window_clamp); ++ tp->rcv_ssthresh += min(room, incr); + inet_csk(sk)->icsk_ack.quick |= 1; + } + } diff --git a/queue-5.0/team-set-slave-to-promisc-if-team-is-already-in-promisc-mode.patch b/queue-5.0/team-set-slave-to-promisc-if-team-is-already-in-promisc-mode.patch new file mode 100644 index 00000000000..80bc104bf33 --- /dev/null +++ b/queue-5.0/team-set-slave-to-promisc-if-team-is-already-in-promisc-mode.patch @@ -0,0 +1,73 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Hangbin Liu +Date: Mon, 8 Apr 2019 16:45:17 +0800 +Subject: team: set slave to promisc if team is already in promisc mode + +From: Hangbin Liu + +[ Upstream commit 43c2adb9df7ddd6560fd3546d925b42cef92daa0 ] + +After adding a team interface to bridge, the team interface will enter +promisc mode. Then if we add a new slave to team0, the slave will keep +promisc off. Fix it by setting slave to promisc on if team master is +already in promisc mode, also do the same for allmulti. + +v2: add promisc and allmulti checking when delete ports + +Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/team/team.c | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -1247,6 +1247,23 @@ static int team_port_add(struct team *te + goto err_option_port_add; + } + ++ /* set promiscuity level to new slave */ ++ if (dev->flags & IFF_PROMISC) { ++ err = dev_set_promiscuity(port_dev, 1); ++ if (err) ++ goto err_set_slave_promisc; ++ } ++ ++ /* set allmulti level to new slave */ ++ if (dev->flags & IFF_ALLMULTI) { ++ err = dev_set_allmulti(port_dev, 1); ++ if (err) { ++ if (dev->flags & IFF_PROMISC) ++ dev_set_promiscuity(port_dev, -1); ++ goto err_set_slave_promisc; ++ } ++ } ++ + netif_addr_lock_bh(dev); + dev_uc_sync_multiple(port_dev, dev); + dev_mc_sync_multiple(port_dev, dev); +@@ -1263,6 +1280,9 @@ static int team_port_add(struct team *te + + return 0; + ++err_set_slave_promisc: ++ __team_option_inst_del_port(team, port); ++ + err_option_port_add: + team_upper_dev_unlink(team, port); + +@@ -1308,6 +1328,12 @@ static int team_port_del(struct team *te + + team_port_disable(team, port); + list_del_rcu(&port->list); ++ ++ if (dev->flags & IFF_PROMISC) ++ dev_set_promiscuity(port_dev, -1); ++ if (dev->flags & IFF_ALLMULTI) ++ dev_set_allmulti(port_dev, -1); ++ + team_upper_dev_unlink(team, port); + netdev_rx_handler_unregister(port_dev); + team_port_disable_netpoll(port); diff --git a/queue-5.0/tipc-missing-entries-in-name-table-of-publications.patch b/queue-5.0/tipc-missing-entries-in-name-table-of-publications.patch new file mode 100644 index 00000000000..b017f09bded --- /dev/null +++ b/queue-5.0/tipc-missing-entries-in-name-table-of-publications.patch @@ -0,0 +1,43 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Hoang Le +Date: Tue, 9 Apr 2019 14:59:24 +0700 +Subject: tipc: missing entries in name table of publications + +From: Hoang Le + +[ Upstream commit d1841533e54876f152a30ac398a34f47ad6590b1 ] + +When binding multiple services with specific type 1Ki, 2Ki.., +this leads to some entries in the name table of publications +missing when listed out via 'tipc name show'. + +The problem is at identify zero last_type conditional provided +via netlink. The first is initial 'type' when starting name table +dummping. The second is continuously with zero type (node state +service type). Then, lookup function failure to finding node state +service type in next iteration. + +To solve this, adding more conditional to marked as dirty type and +lookup correct service type for the next iteration instead of select +the first service as initial 'type' zero. + +Acked-by: Jon Maloy +Signed-off-by: Hoang Le +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/name_table.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/tipc/name_table.c ++++ b/net/tipc/name_table.c +@@ -909,7 +909,8 @@ static int tipc_nl_service_list(struct n + for (; i < TIPC_NAMETBL_SIZE; i++) { + head = &tn->nametbl->services[i]; + +- if (*last_type) { ++ if (*last_type || ++ (!i && *last_key && (*last_lower == *last_key))) { + service = tipc_service_find(net, *last_type); + if (!service) + return -EPIPE; diff --git a/queue-5.0/vhost-reject-zero-size-iova-range.patch b/queue-5.0/vhost-reject-zero-size-iova-range.patch new file mode 100644 index 00000000000..71758ad88db --- /dev/null +++ b/queue-5.0/vhost-reject-zero-size-iova-range.patch @@ -0,0 +1,38 @@ +From foo@baz Sat Apr 20 16:43:09 CEST 2019 +From: Jason Wang +Date: Tue, 9 Apr 2019 12:10:25 +0800 +Subject: vhost: reject zero size iova range + +From: Jason Wang + +[ Upstream commit 813dbeb656d6c90266f251d8bd2b02d445afa63f ] + +We used to accept zero size iova range which will lead a infinite loop +in translate_desc(). Fixing this by failing the request in this case. + +Reported-by: syzbot+d21e6e297322a900c128@syzkaller.appspotmail.com +Fixes: 6b1e6cc7 ("vhost: new device IOTLB API") +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/vhost.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/vhost/vhost.c ++++ b/drivers/vhost/vhost.c +@@ -911,8 +911,12 @@ static int vhost_new_umem_range(struct v + u64 start, u64 size, u64 end, + u64 userspace_addr, int perm) + { +- struct vhost_umem_node *tmp, *node = kmalloc(sizeof(*node), GFP_ATOMIC); ++ struct vhost_umem_node *tmp, *node; + ++ if (!size) ++ return -EFAULT; ++ ++ node = kmalloc(sizeof(*node), GFP_ATOMIC); + if (!node) + return -ENOMEM; +