From: Greg Kroah-Hartman Date: Tue, 16 Jun 2020 07:45:10 +0000 (+0200) Subject: 5.4-stable patches X-Git-Tag: v5.4.47~60 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c1cf77ce7b21119718b12682b47f2d5967d66f20;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch net-mlx5-fix-fatal-error-handling-during-device-load.patch net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch --- diff --git a/queue-5.4/dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch b/queue-5.4/dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch new file mode 100644 index 00000000000..e546dcffae3 --- /dev/null +++ b/queue-5.4/dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch @@ -0,0 +1,79 @@ +From foo@baz Tue 16 Jun 2020 09:44:41 AM CEST +From: Wang Hai +Date: Tue, 9 Jun 2020 22:18:16 +0800 +Subject: dccp: Fix possible memleak in dccp_init and dccp_fini + +From: Wang Hai + +[ Upstream commit c96b6acc8f89a4a7f6258dfe1d077654c11415be ] + +There are some memory leaks in dccp_init() and dccp_fini(). + +In dccp_fini() and the error handling path in dccp_init(), free lhash2 +is missing. Add inet_hashinfo2_free_mod() to do it. + +If inet_hashinfo2_init_mod() failed in dccp_init(), +percpu_counter_destroy() should be called to destroy dccp_orphan_count. +It need to goto out_free_percpu when inet_hashinfo2_init_mod() failed. + +Fixes: c92c81df93df ("net: dccp: fix kernel crash on module load") +Reported-by: Hulk Robot +Signed-off-by: Wang Hai +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_hashtables.h | 6 ++++++ + net/dccp/proto.c | 7 +++++-- + 2 files changed, 11 insertions(+), 2 deletions(-) + +--- a/include/net/inet_hashtables.h ++++ b/include/net/inet_hashtables.h +@@ -185,6 +185,12 @@ static inline spinlock_t *inet_ehash_loc + + int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo); + ++static inline void inet_hashinfo2_free_mod(struct inet_hashinfo *h) ++{ ++ kfree(h->lhash2); ++ h->lhash2 = NULL; ++} ++ + static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) + { + kvfree(hashinfo->ehash_locks); +--- a/net/dccp/proto.c ++++ b/net/dccp/proto.c +@@ -1139,14 +1139,14 @@ static int __init dccp_init(void) + inet_hashinfo_init(&dccp_hashinfo); + rc = inet_hashinfo2_init_mod(&dccp_hashinfo); + if (rc) +- goto out_fail; ++ goto out_free_percpu; + rc = -ENOBUFS; + dccp_hashinfo.bind_bucket_cachep = + kmem_cache_create("dccp_bind_bucket", + sizeof(struct inet_bind_bucket), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!dccp_hashinfo.bind_bucket_cachep) +- goto out_free_percpu; ++ goto out_free_hashinfo2; + + /* + * Size and allocate the main established and bind bucket +@@ -1242,6 +1242,8 @@ out_free_dccp_ehash: + free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); + out_free_bind_bucket_cachep: + kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); ++out_free_hashinfo2: ++ inet_hashinfo2_free_mod(&dccp_hashinfo); + out_free_percpu: + percpu_counter_destroy(&dccp_orphan_count); + out_fail: +@@ -1265,6 +1267,7 @@ static void __exit dccp_fini(void) + kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); + dccp_ackvec_exit(); + dccp_sysctl_exit(); ++ inet_hashinfo2_free_mod(&dccp_hashinfo); + percpu_counter_destroy(&dccp_orphan_count); + } + diff --git a/queue-5.4/net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch b/queue-5.4/net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch new file mode 100644 index 00000000000..b404799a536 --- /dev/null +++ b/queue-5.4/net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch @@ -0,0 +1,104 @@ +From foo@baz Tue 16 Jun 2020 09:44:41 AM CEST +From: Shay Drory +Date: Wed, 6 May 2020 15:59:48 +0300 +Subject: net/mlx5: drain health workqueue in case of driver load error + +From: Shay Drory + +[ Upstream commit 42ea9f1b5c625fad225d4ac96a7e757dd4199d9c ] + +In case there is a work in the health WQ when we teardown the driver, +in driver load error flow, the health work will try to read dev->iseg, +which was already unmap in mlx5_pci_close(). +Fix it by draining the health workqueue first thing in mlx5_pci_close(). + +Trace of the error: +BUG: unable to handle page fault for address: ffffb5b141c18014 +PF: supervisor read access in kernel mode +PF: error_code(0x0000) - not-present page +PGD 1fe95d067 P4D 1fe95d067 PUD 1fe95e067 PMD 1b7823067 PTE 0 +Oops: 0000 [#1] SMP PTI +CPU: 3 PID: 6755 Comm: kworker/u128:2 Not tainted 5.2.0-net-next-mlx5-hv_stats-over-last-worked-hyperv #1 +Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090006 04/28/2016 +Workqueue: mlx5_healtha050:00:02.0 mlx5_fw_fatal_reporter_err_work [mlx5_core] +RIP: 0010:ioread32be+0x30/0x40 +Code: 00 77 27 48 81 ff 00 00 01 00 76 07 0f b7 d7 ed 0f c8 c3 55 48 c7 c6 3b ee d5 9f 48 89 e5 e8 67 fc ff ff b8 ff ff ff ff 5d c3 <8b> 07 0f c8 c3 66 66 2e 0f 1f 84 00 00 00 00 00 48 81 fe ff ff 03 +RSP: 0018:ffffb5b14c56fd78 EFLAGS: 00010292 +RAX: ffffb5b141c18000 RBX: ffff8e9f78a801c0 RCX: 0000000000000000 +RDX: 0000000000000001 RSI: ffff8e9f7ecd7628 RDI: ffffb5b141c18014 +RBP: ffffb5b14c56fd90 R08: 0000000000000001 R09: 0000000000000000 +R10: ffff8e9f372a2c30 R11: ffff8e9f87f4bc40 R12: ffff8e9f372a1fc0 +R13: ffff8e9f78a80000 R14: ffffffffc07136a0 R15: ffff8e9f78ae6f20 +FS: 0000000000000000(0000) GS:ffff8e9f7ecc0000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: ffffb5b141c18014 CR3: 00000001c8f82006 CR4: 00000000003606e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + ? mlx5_health_try_recover+0x4d/0x270 [mlx5_core] + mlx5_fw_fatal_reporter_recover+0x16/0x20 [mlx5_core] + devlink_health_reporter_recover+0x1c/0x50 + devlink_health_report+0xfb/0x240 + mlx5_fw_fatal_reporter_err_work+0x65/0xd0 [mlx5_core] + process_one_work+0x1fb/0x4e0 + ? process_one_work+0x16b/0x4e0 + worker_thread+0x4f/0x3d0 + kthread+0x10d/0x140 + ? process_one_work+0x4e0/0x4e0 + ? kthread_cancel_delayed_work_sync+0x20/0x20 + ret_from_fork+0x1f/0x30 +Modules linked in: nfsv3 rpcsec_gss_krb5 nfsv4 nfs fscache 8021q garp mrp stp llc ipmi_devintf ipmi_msghandler rpcrdma rdma_ucm ib_iser rdma_cm ib_umad iw_cm ib_ipoib libiscsi scsi_transport_iscsi ib_cm mlx5_ib ib_uverbs ib_core mlx5_core sb_edac crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel aes_x86_64 mlxfw crypto_simd cryptd glue_helper input_leds hyperv_fb intel_rapl_perf joydev serio_raw pci_hyperv pci_hyperv_mini mac_hid hv_balloon nfsd auth_rpcgss nfs_acl lockd grace sunrpc sch_fq_codel ip_tables x_tables autofs4 hv_utils hid_generic hv_storvsc ptp hid_hyperv hid hv_netvsc hyperv_keyboard pps_core scsi_transport_fc psmouse hv_vmbus i2c_piix4 floppy pata_acpi +CR2: ffffb5b141c18014 +---[ end trace b12c5503157cad24 ]--- +RIP: 0010:ioread32be+0x30/0x40 +Code: 00 77 27 48 81 ff 00 00 01 00 76 07 0f b7 d7 ed 0f c8 c3 55 48 c7 c6 3b ee d5 9f 48 89 e5 e8 67 fc ff ff b8 ff ff ff ff 5d c3 <8b> 07 0f c8 c3 66 66 2e 0f 1f 84 00 00 00 00 00 48 81 fe ff ff 03 +RSP: 0018:ffffb5b14c56fd78 EFLAGS: 00010292 +RAX: ffffb5b141c18000 RBX: ffff8e9f78a801c0 RCX: 0000000000000000 +RDX: 0000000000000001 RSI: ffff8e9f7ecd7628 RDI: ffffb5b141c18014 +RBP: ffffb5b14c56fd90 R08: 0000000000000001 R09: 0000000000000000 +R10: ffff8e9f372a2c30 R11: ffff8e9f87f4bc40 R12: ffff8e9f372a1fc0 +R13: ffff8e9f78a80000 R14: ffffffffc07136a0 R15: ffff8e9f78ae6f20 +FS: 0000000000000000(0000) GS:ffff8e9f7ecc0000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: ffffb5b141c18014 CR3: 00000001c8f82006 CR4: 00000000003606e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +BUG: sleeping function called from invalid context at ./include/linux/percpu-rwsem.h:38 +in_atomic(): 0, irqs_disabled(): 1, pid: 6755, name: kworker/u128:2 +INFO: lockdep is turned off. +CPU: 3 PID: 6755 Comm: kworker/u128:2 Tainted: G D 5.2.0-net-next-mlx5-hv_stats-over-last-worked-hyperv #1 +Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090006 04/28/2016 +Workqueue: mlx5_healtha050:00:02.0 mlx5_fw_fatal_reporter_err_work [mlx5_core] +Call Trace: + dump_stack+0x63/0x88 + ___might_sleep+0x10a/0x130 + __might_sleep+0x4a/0x80 + exit_signals+0x33/0x230 + ? blocking_notifier_call_chain+0x16/0x20 + do_exit+0xb1/0xc30 + ? kthread+0x10d/0x140 + ? process_one_work+0x4e0/0x4e0 + +Fixes: 52c368dc3da7 ("net/mlx5: Move health and page alloc init to mdev_init") +Signed-off-by: Shay Drory +Reviewed-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -794,6 +794,11 @@ err_disable: + + static void mlx5_pci_close(struct mlx5_core_dev *dev) + { ++ /* health work might still be active, and it needs pci bar in ++ * order to know the NIC state. Therefore, drain the health WQ ++ * before removing the pci bars ++ */ ++ mlx5_drain_health_wq(dev); + iounmap(dev->iseg); + pci_clear_master(dev->pdev); + release_bar(dev->pdev); diff --git a/queue-5.4/net-mlx5-fix-fatal-error-handling-during-device-load.patch b/queue-5.4/net-mlx5-fix-fatal-error-handling-during-device-load.patch new file mode 100644 index 00000000000..e259d6dd893 --- /dev/null +++ b/queue-5.4/net-mlx5-fix-fatal-error-handling-during-device-load.patch @@ -0,0 +1,54 @@ +From foo@baz Tue 16 Jun 2020 09:44:41 AM CEST +From: Shay Drory +Date: Thu, 7 May 2020 09:32:53 +0300 +Subject: net/mlx5: Fix fatal error handling during device load + +From: Shay Drory + +[ Upstream commit b6e0b6bebe0732d5cac51f0791f269d2413b8980 ] + +Currently, in case of fatal error during mlx5_load_one(), we cannot +enter error state until mlx5_load_one() is finished, what can take +several minutes until commands will get timeouts, because these commands +can't be processed due to the fatal error. +Fix it by setting dev->state as MLX5_DEVICE_STATE_INTERNAL_ERROR before +requesting the lock. + +Fixes: c1d4d2e92ad6 ("net/mlx5: Avoid calling sleeping function by the health poll thread") +Signed-off-by: Shay Drory +Reviewed-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/health.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c +@@ -193,15 +193,23 @@ static bool reset_fw_if_needed(struct ml + + void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) + { ++ bool err_detected = false; ++ ++ /* Mark the device as fatal in order to abort FW commands */ ++ if ((check_fatal_sensors(dev) || force) && ++ dev->state == MLX5_DEVICE_STATE_UP) { ++ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; ++ err_detected = true; ++ } + mutex_lock(&dev->intf_state_mutex); +- if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) +- goto unlock; ++ if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) ++ goto unlock;/* a previous error is still being handled */ + if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) { + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + goto unlock; + } + +- if (check_fatal_sensors(dev) || force) { ++ if (check_fatal_sensors(dev) || force) { /* protected state setting */ + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + mlx5_cmd_flush(dev); + } diff --git a/queue-5.4/net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch b/queue-5.4/net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch new file mode 100644 index 00000000000..1e43882f7f5 --- /dev/null +++ b/queue-5.4/net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch @@ -0,0 +1,44 @@ +From foo@baz Tue 16 Jun 2020 09:44:41 AM CEST +From: Maxim Mikityanskiy +Date: Mon, 1 Jun 2020 16:03:44 +0300 +Subject: net/mlx5e: Fix repeated XSK usage on one channel + +From: Maxim Mikityanskiy + +[ Upstream commit 36d45fb9d2fdf348d778bfe73f0427db1c6f9bc7 ] + +After an XSK is closed, the relevant structures in the channel are not +zeroed. If an XSK is opened the second time on the same channel without +recreating channels, the stray values in the structures will lead to +incorrect operation of queues, which causes CQE errors, and the new +socket doesn't work at all. + +This patch fixes the issue by explicitly zeroing XSK-related structs in +the channel on XSK close. Note that those structs are zeroed on channel +creation, and usually a configuration change (XDP program is set) +happens on XSK open, which leads to recreating channels, so typical XSK +usecases don't suffer from this issue. However, if XSKs are opened and +closed on the same channel without removing the XDP program, this bug +reproduces. + +Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support") +Signed-off-by: Maxim Mikityanskiy +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +@@ -152,6 +152,10 @@ void mlx5e_close_xsk(struct mlx5e_channe + mlx5e_close_cq(&c->xskicosq.cq); + mlx5e_close_xdpsq(&c->xsksq); + mlx5e_close_cq(&c->xsksq.cq); ++ ++ memset(&c->xskrq, 0, sizeof(c->xskrq)); ++ memset(&c->xsksq, 0, sizeof(c->xsksq)); ++ memset(&c->xskicosq, 0, sizeof(c->xskicosq)); + } + + void mlx5e_activate_xsk(struct mlx5e_channel *c) diff --git a/queue-5.4/selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch b/queue-5.4/selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch new file mode 100644 index 00000000000..9f043b2e684 --- /dev/null +++ b/queue-5.4/selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch @@ -0,0 +1,31 @@ +From foo@baz Tue 16 Jun 2020 09:44:41 AM CEST +From: tannerlove +Date: Tue, 9 Jun 2020 17:21:32 -0400 +Subject: selftests/net: in rxtimestamp getopt_long needs terminating null entry + +From: tannerlove + +[ Upstream commit 865a6cbb2288f8af7f9dc3b153c61b7014fdcf1e ] + +getopt_long requires the last element to be filled with zeros. +Otherwise, passing an unrecognized option can cause a segfault. + +Fixes: 16e781224198 ("selftests/net: Add a test to validate behavior of rx timestamps") +Signed-off-by: Tanner Love +Acked-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/networking/timestamping/rxtimestamp.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/tools/testing/selftests/networking/timestamping/rxtimestamp.c ++++ b/tools/testing/selftests/networking/timestamping/rxtimestamp.c +@@ -115,6 +115,7 @@ static struct option long_options[] = { + { "tcp", no_argument, 0, 't' }, + { "udp", no_argument, 0, 'u' }, + { "ip", no_argument, 0, 'i' }, ++ { NULL, 0, NULL, 0 }, + }; + + static int next_port = 19999; diff --git a/queue-5.4/series b/queue-5.4/series index 19bf75804b7..40b45b983d4 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -86,3 +86,8 @@ firmware-imx-scu-fix-corruption-of-header.patch crypto-virtio-fix-use-after-free-in-virtio_crypto_sk.patch crypto-virtio-fix-src-dst-scatterlist-calculation-in.patch crypto-virtio-fix-dest-length-calculation-in-__virti.patch +dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch +selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch +net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch +net-mlx5-fix-fatal-error-handling-during-device-load.patch +net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch