5.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 16 Jun 2020 07:45:10 +0000 (09:45 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 16 Jun 2020 07:45:10 +0000 (09:45 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 16 Jun 2020 07:45:10 +0000 (09:45 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 16 Jun 2020 07:45:10 +0000 (09:45 +0200)
diff --git a/queue-5.4/dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch b/queue-5.4/dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch

new file mode 100644 (file)

index 0000000..e546dcf
--- /dev/null
+++ b/queue-5.4/dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch
@@ -0,0 +1,79 @@
+From foo@baz Tue 16 Jun 2020 09:44:41 AM CEST
+From: Wang Hai <wanghai38@huawei.com>
+Date: Tue, 9 Jun 2020 22:18:16 +0800
+Subject: dccp: Fix possible memleak in dccp_init and dccp_fini
+
+From: Wang Hai <wanghai38@huawei.com>
+
+[ Upstream commit c96b6acc8f89a4a7f6258dfe1d077654c11415be ]
+
+There are some memory leaks in dccp_init() and dccp_fini().
+
+In dccp_fini() and the error handling path in dccp_init(), free lhash2
+is missing. Add inet_hashinfo2_free_mod() to do it.
+
+If inet_hashinfo2_init_mod() failed in dccp_init(),
+percpu_counter_destroy() should be called to destroy dccp_orphan_count.
+It need to goto out_free_percpu when inet_hashinfo2_init_mod() failed.
+
+Fixes: c92c81df93df ("net: dccp: fix kernel crash on module load")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Wang Hai <wanghai38@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_hashtables.h |    6 ++++++
+ net/dccp/proto.c              |    7 +++++--
+ 2 files changed, 11 insertions(+), 2 deletions(-)
+
+--- a/include/net/inet_hashtables.h
++++ b/include/net/inet_hashtables.h
+@@ -185,6 +185,12 @@ static inline spinlock_t *inet_ehash_loc
+ 
+ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo);
+ 
++static inline void inet_hashinfo2_free_mod(struct inet_hashinfo *h)
++{
++      kfree(h->lhash2);
++      h->lhash2 = NULL;
++}
++
+ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
+ {
+       kvfree(hashinfo->ehash_locks);
+--- a/net/dccp/proto.c
++++ b/net/dccp/proto.c
+@@ -1139,14 +1139,14 @@ static int __init dccp_init(void)
+       inet_hashinfo_init(&dccp_hashinfo);
+       rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
+       if (rc)
+-              goto out_fail;
++              goto out_free_percpu;
+       rc = -ENOBUFS;
+       dccp_hashinfo.bind_bucket_cachep =
+               kmem_cache_create("dccp_bind_bucket",
+                                 sizeof(struct inet_bind_bucket), 0,
+                                 SLAB_HWCACHE_ALIGN, NULL);
+       if (!dccp_hashinfo.bind_bucket_cachep)
+-              goto out_free_percpu;
++              goto out_free_hashinfo2;
+ 
+       /*
+        * Size and allocate the main established and bind bucket
+@@ -1242,6 +1242,8 @@ out_free_dccp_ehash:
+       free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
+ out_free_bind_bucket_cachep:
+       kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
++out_free_hashinfo2:
++      inet_hashinfo2_free_mod(&dccp_hashinfo);
+ out_free_percpu:
+       percpu_counter_destroy(&dccp_orphan_count);
+ out_fail:
+@@ -1265,6 +1267,7 @@ static void __exit dccp_fini(void)
+       kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+       dccp_ackvec_exit();
+       dccp_sysctl_exit();
++      inet_hashinfo2_free_mod(&dccp_hashinfo);
+       percpu_counter_destroy(&dccp_orphan_count);
+ }
+ 
diff --git a/queue-5.4/net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch b/queue-5.4/net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch

new file mode 100644 (file)

index 0000000..b404799
--- /dev/null
+++ b/queue-5.4/net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch
@@ -0,0 +1,104 @@
+From foo@baz Tue 16 Jun 2020 09:44:41 AM CEST
+From: Shay Drory <shayd@mellanox.com>
+Date: Wed, 6 May 2020 15:59:48 +0300
+Subject: net/mlx5: drain health workqueue in case of driver load error
+
+From: Shay Drory <shayd@mellanox.com>
+
+[ Upstream commit 42ea9f1b5c625fad225d4ac96a7e757dd4199d9c ]
+
+In case there is a work in the health WQ when we teardown the driver,
+in driver load error flow, the health work will try to read dev->iseg,
+which was already unmap in mlx5_pci_close().
+Fix it by draining the health workqueue first thing in mlx5_pci_close().
+
+Trace of the error:
+BUG: unable to handle page fault for address: ffffb5b141c18014
+PF: supervisor read access in kernel mode
+PF: error_code(0x0000) - not-present page
+PGD 1fe95d067 P4D 1fe95d067 PUD 1fe95e067 PMD 1b7823067 PTE 0
+Oops: 0000 [#1] SMP PTI
+CPU: 3 PID: 6755 Comm: kworker/u128:2 Not tainted 5.2.0-net-next-mlx5-hv_stats-over-last-worked-hyperv #1
+Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090006  04/28/2016
+Workqueue: mlx5_healtha050:00:02.0 mlx5_fw_fatal_reporter_err_work [mlx5_core]
+RIP: 0010:ioread32be+0x30/0x40
+Code: 00 77 27 48 81 ff 00 00 01 00 76 07 0f b7 d7 ed 0f c8 c3 55 48 c7 c6 3b ee d5 9f 48 89 e5 e8 67 fc ff ff b8 ff ff ff ff 5d c3 <8b> 07 0f c8 c3 66 66 2e 0f 1f 84 00 00 00 00 00 48 81 fe ff ff 03
+RSP: 0018:ffffb5b14c56fd78 EFLAGS: 00010292
+RAX: ffffb5b141c18000 RBX: ffff8e9f78a801c0 RCX: 0000000000000000
+RDX: 0000000000000001 RSI: ffff8e9f7ecd7628 RDI: ffffb5b141c18014
+RBP: ffffb5b14c56fd90 R08: 0000000000000001 R09: 0000000000000000
+R10: ffff8e9f372a2c30 R11: ffff8e9f87f4bc40 R12: ffff8e9f372a1fc0
+R13: ffff8e9f78a80000 R14: ffffffffc07136a0 R15: ffff8e9f78ae6f20
+FS:  0000000000000000(0000) GS:ffff8e9f7ecc0000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffffb5b141c18014 CR3: 00000001c8f82006 CR4: 00000000003606e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ ? mlx5_health_try_recover+0x4d/0x270 [mlx5_core]
+ mlx5_fw_fatal_reporter_recover+0x16/0x20 [mlx5_core]
+ devlink_health_reporter_recover+0x1c/0x50
+ devlink_health_report+0xfb/0x240
+ mlx5_fw_fatal_reporter_err_work+0x65/0xd0 [mlx5_core]
+ process_one_work+0x1fb/0x4e0
+ ? process_one_work+0x16b/0x4e0
+ worker_thread+0x4f/0x3d0
+ kthread+0x10d/0x140
+ ? process_one_work+0x4e0/0x4e0
+ ? kthread_cancel_delayed_work_sync+0x20/0x20
+ ret_from_fork+0x1f/0x30
+Modules linked in: nfsv3 rpcsec_gss_krb5 nfsv4 nfs fscache 8021q garp mrp stp llc ipmi_devintf ipmi_msghandler rpcrdma rdma_ucm ib_iser rdma_cm ib_umad iw_cm ib_ipoib libiscsi scsi_transport_iscsi ib_cm mlx5_ib ib_uverbs ib_core mlx5_core sb_edac crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel aes_x86_64 mlxfw crypto_simd cryptd glue_helper input_leds hyperv_fb intel_rapl_perf joydev serio_raw pci_hyperv pci_hyperv_mini mac_hid hv_balloon nfsd auth_rpcgss nfs_acl lockd grace sunrpc sch_fq_codel ip_tables x_tables autofs4 hv_utils hid_generic hv_storvsc ptp hid_hyperv hid hv_netvsc hyperv_keyboard pps_core scsi_transport_fc psmouse hv_vmbus i2c_piix4 floppy pata_acpi
+CR2: ffffb5b141c18014
+---[ end trace b12c5503157cad24 ]---
+RIP: 0010:ioread32be+0x30/0x40
+Code: 00 77 27 48 81 ff 00 00 01 00 76 07 0f b7 d7 ed 0f c8 c3 55 48 c7 c6 3b ee d5 9f 48 89 e5 e8 67 fc ff ff b8 ff ff ff ff 5d c3 <8b> 07 0f c8 c3 66 66 2e 0f 1f 84 00 00 00 00 00 48 81 fe ff ff 03
+RSP: 0018:ffffb5b14c56fd78 EFLAGS: 00010292
+RAX: ffffb5b141c18000 RBX: ffff8e9f78a801c0 RCX: 0000000000000000
+RDX: 0000000000000001 RSI: ffff8e9f7ecd7628 RDI: ffffb5b141c18014
+RBP: ffffb5b14c56fd90 R08: 0000000000000001 R09: 0000000000000000
+R10: ffff8e9f372a2c30 R11: ffff8e9f87f4bc40 R12: ffff8e9f372a1fc0
+R13: ffff8e9f78a80000 R14: ffffffffc07136a0 R15: ffff8e9f78ae6f20
+FS:  0000000000000000(0000) GS:ffff8e9f7ecc0000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffffb5b141c18014 CR3: 00000001c8f82006 CR4: 00000000003606e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+BUG: sleeping function called from invalid context at ./include/linux/percpu-rwsem.h:38
+in_atomic(): 0, irqs_disabled(): 1, pid: 6755, name: kworker/u128:2
+INFO: lockdep is turned off.
+CPU: 3 PID: 6755 Comm: kworker/u128:2 Tainted: G      D           5.2.0-net-next-mlx5-hv_stats-over-last-worked-hyperv #1
+Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090006  04/28/2016
+Workqueue: mlx5_healtha050:00:02.0 mlx5_fw_fatal_reporter_err_work [mlx5_core]
+Call Trace:
+ dump_stack+0x63/0x88
+ ___might_sleep+0x10a/0x130
+ __might_sleep+0x4a/0x80
+ exit_signals+0x33/0x230
+ ? blocking_notifier_call_chain+0x16/0x20
+ do_exit+0xb1/0xc30
+ ? kthread+0x10d/0x140
+ ? process_one_work+0x4e0/0x4e0
+
+Fixes: 52c368dc3da7 ("net/mlx5: Move health and page alloc init to mdev_init")
+Signed-off-by: Shay Drory <shayd@mellanox.com>
+Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -794,6 +794,11 @@ err_disable:
+ 
+ static void mlx5_pci_close(struct mlx5_core_dev *dev)
+ {
++      /* health work might still be active, and it needs pci bar in
++       * order to know the NIC state. Therefore, drain the health WQ
++       * before removing the pci bars
++       */
++      mlx5_drain_health_wq(dev);
+       iounmap(dev->iseg);
+       pci_clear_master(dev->pdev);
+       release_bar(dev->pdev);
diff --git a/queue-5.4/net-mlx5-fix-fatal-error-handling-during-device-load.patch b/queue-5.4/net-mlx5-fix-fatal-error-handling-during-device-load.patch

new file mode 100644 (file)

index 0000000..e259d6d
--- /dev/null
+++ b/queue-5.4/net-mlx5-fix-fatal-error-handling-during-device-load.patch
@@ -0,0 +1,54 @@
+From foo@baz Tue 16 Jun 2020 09:44:41 AM CEST
+From: Shay Drory <shayd@mellanox.com>
+Date: Thu, 7 May 2020 09:32:53 +0300
+Subject: net/mlx5: Fix fatal error handling during device load
+
+From: Shay Drory <shayd@mellanox.com>
+
+[ Upstream commit b6e0b6bebe0732d5cac51f0791f269d2413b8980 ]
+
+Currently, in case of fatal error during mlx5_load_one(), we cannot
+enter error state until mlx5_load_one() is finished, what can take
+several minutes until commands will get timeouts, because these commands
+can't be processed due to the fatal error.
+Fix it by setting dev->state as MLX5_DEVICE_STATE_INTERNAL_ERROR before
+requesting the lock.
+
+Fixes: c1d4d2e92ad6 ("net/mlx5: Avoid calling sleeping function by the health poll thread")
+Signed-off-by: Shay Drory <shayd@mellanox.com>
+Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/health.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
+@@ -193,15 +193,23 @@ static bool reset_fw_if_needed(struct ml
+ 
+ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
+ {
++      bool err_detected = false;
++
++      /* Mark the device as fatal in order to abort FW commands */
++      if ((check_fatal_sensors(dev) || force) &&
++          dev->state == MLX5_DEVICE_STATE_UP) {
++              dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
++              err_detected = true;
++      }
+       mutex_lock(&dev->intf_state_mutex);
+-      if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+-              goto unlock;
++      if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
++              goto unlock;/* a previous error is still being handled */
+       if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) {
+               dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+               goto unlock;
+       }
+ 
+-      if (check_fatal_sensors(dev) || force) {
++      if (check_fatal_sensors(dev) || force) { /* protected state setting */
+               dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+               mlx5_cmd_flush(dev);
+       }
diff --git a/queue-5.4/net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch b/queue-5.4/net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch

new file mode 100644 (file)

index 0000000..1e43882
--- /dev/null
+++ b/queue-5.4/net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch
@@ -0,0 +1,44 @@
+From foo@baz Tue 16 Jun 2020 09:44:41 AM CEST
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+Date: Mon, 1 Jun 2020 16:03:44 +0300
+Subject: net/mlx5e: Fix repeated XSK usage on one channel
+
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+
+[ Upstream commit 36d45fb9d2fdf348d778bfe73f0427db1c6f9bc7 ]
+
+After an XSK is closed, the relevant structures in the channel are not
+zeroed. If an XSK is opened the second time on the same channel without
+recreating channels, the stray values in the structures will lead to
+incorrect operation of queues, which causes CQE errors, and the new
+socket doesn't work at all.
+
+This patch fixes the issue by explicitly zeroing XSK-related structs in
+the channel on XSK close. Note that those structs are zeroed on channel
+creation, and usually a configuration change (XDP program is set)
+happens on XSK open, which leads to recreating channels, so typical XSK
+usecases don't suffer from this issue. However, if XSKs are opened and
+closed on the same channel without removing the XDP program, this bug
+reproduces.
+
+Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support")
+Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+@@ -152,6 +152,10 @@ void mlx5e_close_xsk(struct mlx5e_channe
+       mlx5e_close_cq(&c->xskicosq.cq);
+       mlx5e_close_xdpsq(&c->xsksq);
+       mlx5e_close_cq(&c->xsksq.cq);
++
++      memset(&c->xskrq, 0, sizeof(c->xskrq));
++      memset(&c->xsksq, 0, sizeof(c->xsksq));
++      memset(&c->xskicosq, 0, sizeof(c->xskicosq));
+ }
+ 
+ void mlx5e_activate_xsk(struct mlx5e_channel *c)
diff --git a/queue-5.4/selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch b/queue-5.4/selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch

new file mode 100644 (file)

index 0000000..9f043b2
--- /dev/null
+++ b/queue-5.4/selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch
@@ -0,0 +1,31 @@
+From foo@baz Tue 16 Jun 2020 09:44:41 AM CEST
+From: tannerlove <tannerlove@google.com>
+Date: Tue, 9 Jun 2020 17:21:32 -0400
+Subject: selftests/net: in rxtimestamp getopt_long needs terminating null entry
+
+From: tannerlove <tannerlove@google.com>
+
+[ Upstream commit 865a6cbb2288f8af7f9dc3b153c61b7014fdcf1e ]
+
+getopt_long requires the last element to be filled with zeros.
+Otherwise, passing an unrecognized option can cause a segfault.
+
+Fixes: 16e781224198 ("selftests/net: Add a test to validate behavior of rx timestamps")
+Signed-off-by: Tanner Love <tannerlove@google.com>
+Acked-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/networking/timestamping/rxtimestamp.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/tools/testing/selftests/networking/timestamping/rxtimestamp.c
++++ b/tools/testing/selftests/networking/timestamping/rxtimestamp.c
+@@ -115,6 +115,7 @@ static struct option long_options[] = {
+       { "tcp", no_argument, 0, 't' },
+       { "udp", no_argument, 0, 'u' },
+       { "ip", no_argument, 0, 'i' },
++      { NULL, 0, NULL, 0 },
+ };
+ 
+ static int next_port = 19999;
diff --git a/queue-5.4/series b/queue-5.4/series

index 19bf75804b7727a6b476bb517d4b6b09ba3c1580..40b45b983d44aaf1bd7f60f44591aa0488ad297f 100644 (file)
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -86,3 +86,8 @@ firmware-imx-scu-fix-corruption-of-header.patch
  crypto-virtio-fix-use-after-free-in-virtio_crypto_sk.patch
  crypto-virtio-fix-src-dst-scatterlist-calculation-in.patch
  crypto-virtio-fix-dest-length-calculation-in-__virti.patch
+dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch
+selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch
+net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch
+net-mlx5-fix-fatal-error-handling-during-device-load.patch
+net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 16 Jun 2020 07:45:10 +0000 (09:45 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 16 Jun 2020 07:45:10 +0000 (09:45 +0200)
queue-5.4/dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-mlx5-fix-fatal-error-handling-during-device-load.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/series		patch \| blob \| blame \| history