5.6-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 16 Jun 2020 07:43:41 +0000 (09:43 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 16 Jun 2020 07:43:41 +0000 (09:43 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 16 Jun 2020 07:43:41 +0000 (09:43 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 16 Jun 2020 07:43:41 +0000 (09:43 +0200)
diff --git a/queue-5.6/dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch b/queue-5.6/dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch

new file mode 100644 (file)

index 0000000..94d9017
--- /dev/null
+++ b/queue-5.6/dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch
@@ -0,0 +1,79 @@
+From foo@baz Tue 16 Jun 2020 09:42:59 AM CEST
+From: Wang Hai <wanghai38@huawei.com>
+Date: Tue, 9 Jun 2020 22:18:16 +0800
+Subject: dccp: Fix possible memleak in dccp_init and dccp_fini
+
+From: Wang Hai <wanghai38@huawei.com>
+
+[ Upstream commit c96b6acc8f89a4a7f6258dfe1d077654c11415be ]
+
+There are some memory leaks in dccp_init() and dccp_fini().
+
+In dccp_fini() and the error handling path in dccp_init(), free lhash2
+is missing. Add inet_hashinfo2_free_mod() to do it.
+
+If inet_hashinfo2_init_mod() failed in dccp_init(),
+percpu_counter_destroy() should be called to destroy dccp_orphan_count.
+It need to goto out_free_percpu when inet_hashinfo2_init_mod() failed.
+
+Fixes: c92c81df93df ("net: dccp: fix kernel crash on module load")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Wang Hai <wanghai38@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_hashtables.h |    6 ++++++
+ net/dccp/proto.c              |    7 +++++--
+ 2 files changed, 11 insertions(+), 2 deletions(-)
+
+--- a/include/net/inet_hashtables.h
++++ b/include/net/inet_hashtables.h
+@@ -185,6 +185,12 @@ static inline spinlock_t *inet_ehash_loc
+ 
+ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo);
+ 
++static inline void inet_hashinfo2_free_mod(struct inet_hashinfo *h)
++{
++      kfree(h->lhash2);
++      h->lhash2 = NULL;
++}
++
+ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
+ {
+       kvfree(hashinfo->ehash_locks);
+--- a/net/dccp/proto.c
++++ b/net/dccp/proto.c
+@@ -1139,14 +1139,14 @@ static int __init dccp_init(void)
+       inet_hashinfo_init(&dccp_hashinfo);
+       rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
+       if (rc)
+-              goto out_fail;
++              goto out_free_percpu;
+       rc = -ENOBUFS;
+       dccp_hashinfo.bind_bucket_cachep =
+               kmem_cache_create("dccp_bind_bucket",
+                                 sizeof(struct inet_bind_bucket), 0,
+                                 SLAB_HWCACHE_ALIGN, NULL);
+       if (!dccp_hashinfo.bind_bucket_cachep)
+-              goto out_free_percpu;
++              goto out_free_hashinfo2;
+ 
+       /*
+        * Size and allocate the main established and bind bucket
+@@ -1242,6 +1242,8 @@ out_free_dccp_ehash:
+       free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
+ out_free_bind_bucket_cachep:
+       kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
++out_free_hashinfo2:
++      inet_hashinfo2_free_mod(&dccp_hashinfo);
+ out_free_percpu:
+       percpu_counter_destroy(&dccp_orphan_count);
+ out_fail:
+@@ -1265,6 +1267,7 @@ static void __exit dccp_fini(void)
+       kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+       dccp_ackvec_exit();
+       dccp_sysctl_exit();
++      inet_hashinfo2_free_mod(&dccp_hashinfo);
+       percpu_counter_destroy(&dccp_orphan_count);
+ }
+ 
diff --git a/queue-5.6/net-cadence-macb-disable-napi-on-error.patch b/queue-5.6/net-cadence-macb-disable-napi-on-error.patch

new file mode 100644 (file)

index 0000000..832124d
--- /dev/null
+++ b/queue-5.6/net-cadence-macb-disable-napi-on-error.patch
@@ -0,0 +1,96 @@
+From foo@baz Tue 16 Jun 2020 09:42:59 AM CEST
+From: Corentin Labbe <clabbe@baylibre.com>
+Date: Wed, 10 Jun 2020 09:53:44 +0000
+Subject: net: cadence: macb: disable NAPI on error
+
+From: Corentin Labbe <clabbe@baylibre.com>
+
+[ Upstream commit 014406babc1f5f887a08737566b5b356c7018242 ]
+
+When the PHY is not working, the macb driver crash on a second try to
+setup it.
+[   78.545994] macb e000b000.ethernet eth0: Could not attach PHY (-19)
+ifconfig: SIOCSIFFLAGS: No such device
+[   78.655457] ------------[ cut here ]------------
+[   78.656014] kernel BUG at /linux-next/include/linux/netdevice.h:521!
+[   78.656504] Internal error: Oops - BUG: 0 [#1] SMP ARM
+[   78.657079] Modules linked in:
+[   78.657795] CPU: 0 PID: 122 Comm: ifconfig Not tainted 5.7.0-next-20200609 #1
+[   78.658202] Hardware name: Xilinx Zynq Platform
+[   78.659632] PC is at macb_open+0x220/0x294
+[   78.660160] LR is at 0x0
+[   78.660373] pc : [<c0b0a634>]    lr : [<00000000>]    psr: 60000013
+[   78.660716] sp : c89ffd70  ip : c8a28800  fp : c199bac0
+[   78.661040] r10: 00000000  r9 : c8838540  r8 : c8838568
+[   78.661362] r7 : 00000001  r6 : c8838000  r5 : c883c000  r4 : 00000000
+[   78.661724] r3 : 00000010  r2 : 00000000  r1 : 00000000  r0 : 00000000
+[   78.662187] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
+[   78.662635] Control: 10c5387d  Table: 08b64059  DAC: 00000051
+[   78.663035] Process ifconfig (pid: 122, stack limit = 0x(ptrval))
+[   78.663476] Stack: (0xc89ffd70 to 0xc8a00000)
+[   78.664121] fd60:                                     00000000 c89fe000 c8838000 c89fe000
+[   78.664866] fd80: 00000000 c11ff9ac c8838028 00000000 00000000 c0de6f2c 00000001 c1804eec
+[   78.665579] fda0: c19b8178 c8838000 00000000 ca760866 c8838000 00000001 00001043 c89fe000
+[   78.666355] fdc0: 00001002 c0de72f4 c89fe000 c0de8dc0 00008914 c89fe000 c199bac0 ca760866
+[   78.667111] fde0: c89ffddc c8838000 00001002 00000000 c8838138 c881010c 00008914 c0de7364
+[   78.667862] fe00: 00000000 c89ffe70 c89fe000 ffffffff c881010c c0e8bd48 00000003 00000000
+[   78.668601] fe20: c8838000 c8810100 39c1118f 00039c11 c89a0960 00001043 00000000 000a26d0
+[   78.669343] fe40: b6f43000 ca760866 c89a0960 00000051 befe6c50 00008914 c8b2a3c0 befe6c50
+[   78.670086] fe60: 00000003 ee610500 00000000 c0e8ef58 30687465 00000000 00000000 00000000
+[   78.670865] fe80: 00001043 00000000 000a26d0 b6f43000 c89a0600 ee40ae7c c8870d00 c0ddabf4
+[   78.671593] fea0: c89ffeec c0ddabf4 c89ffeec c199bac0 00008913 c0ddac48 c89ffeec c89fe000
+[   78.672324] fec0: befe6c50 ca760866 befe6c50 00008914 c89fe000 befe6c50 c8b2a3c0 c0dc00e4
+[   78.673088] fee0: c89a0480 00000201 00000cc0 30687465 00000000 00000000 00000000 00001002
+[   78.673822] ff00: 00000000 000a26d0 b6f43000 ca760866 00008914 c8b2a3c0 000a0ec4 c8b2a3c0
+[   78.674576] ff20: befe6c50 c04b21bc 000d5004 00000817 c89a0480 c0315f94 00000000 00000003
+[   78.675415] ff40: c19a2bc8 c8a3cc00 c89fe000 00000255 00000000 00000000 00000000 000d5000
+[   78.676182] ff60: 000f6000 c180b2a0 00000817 c0315e64 000d5004 c89fffb0 b6ec0c30 ca760866
+[   78.676928] ff80: 00000000 000b609b befe6c50 000a0ec4 00000036 c03002c4 c89fe000 00000036
+[   78.677673] ffa0: 00000000 c03000c0 000b609b befe6c50 00000003 00008914 befe6c50 000b609b
+[   78.678415] ffc0: 000b609b befe6c50 000a0ec4 00000036 befe6e0c befe6f1a 000d5150 00000000
+[   78.679154] ffe0: 000d41e4 befe6bf4 00019648 b6e4509c 20000010 00000003 00000000 00000000
+[   78.681059] [<c0b0a634>] (macb_open) from [<c0de6f2c>] (__dev_open+0xd0/0x154)
+[   78.681571] [<c0de6f2c>] (__dev_open) from [<c0de72f4>] (__dev_change_flags+0x16c/0x1c4)
+[   78.682015] [<c0de72f4>] (__dev_change_flags) from [<c0de7364>] (dev_change_flags+0x18/0x48)
+[   78.682493] [<c0de7364>] (dev_change_flags) from [<c0e8bd48>] (devinet_ioctl+0x5e4/0x75c)
+[   78.682945] [<c0e8bd48>] (devinet_ioctl) from [<c0e8ef58>] (inet_ioctl+0x1f0/0x3b4)
+[   78.683381] [<c0e8ef58>] (inet_ioctl) from [<c0dc00e4>] (sock_ioctl+0x39c/0x664)
+[   78.683818] [<c0dc00e4>] (sock_ioctl) from [<c04b21bc>] (ksys_ioctl+0x2d8/0x9c0)
+[   78.684343] [<c04b21bc>] (ksys_ioctl) from [<c03000c0>] (ret_fast_syscall+0x0/0x54)
+[   78.684789] Exception stack(0xc89fffa8 to 0xc89ffff0)
+[   78.685346] ffa0:                   000b609b befe6c50 00000003 00008914 befe6c50 000b609b
+[   78.686106] ffc0: 000b609b befe6c50 000a0ec4 00000036 befe6e0c befe6f1a 000d5150 00000000
+[   78.686710] ffe0: 000d41e4 befe6bf4 00019648 b6e4509c
+[   78.687582] Code: 9a000003 e5983078 e3130001 1affffef (e7f001f2)
+[   78.688788] ---[ end trace e3f2f6ab69754eae ]---
+
+This is due to NAPI left enabled if macb_phylink_connect() fail.
+
+Fixes: 7897b071ac3b ("net: macb: convert to phylink")
+Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cadence/macb_main.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/cadence/macb_main.c
++++ b/drivers/net/ethernet/cadence/macb_main.c
+@@ -2545,13 +2545,16 @@ static int macb_open(struct net_device *
+ 
+       err = macb_phylink_connect(bp);
+       if (err)
+-              goto pm_exit;
++              goto napi_exit;
+ 
+       netif_tx_start_all_queues(dev);
+ 
+       if (bp->ptp_info)
+               bp->ptp_info->ptp_init(dev);
+ 
++napi_exit:
++      for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
++              napi_disable(&queue->napi);
+ pm_exit:
+       if (err) {
+               pm_runtime_put_sync(&bp->pdev->dev);
diff --git a/queue-5.6/net-macb-only-disable-napi-on-the-actual-error-path.patch b/queue-5.6/net-macb-only-disable-napi-on-the-actual-error-path.patch

new file mode 100644 (file)

index 0000000..c5cb1df
--- /dev/null
+++ b/queue-5.6/net-macb-only-disable-napi-on-the-actual-error-path.patch
@@ -0,0 +1,45 @@
+From foo@baz Tue 16 Jun 2020 09:42:59 AM CEST
+From: Charles Keepax <ckeepax@opensource.cirrus.com>
+Date: Mon, 15 Jun 2020 14:18:54 +0100
+Subject: net: macb: Only disable NAPI on the actual error path
+
+From: Charles Keepax <ckeepax@opensource.cirrus.com>
+
+[ Upstream commit 939a5bf7c9b7a1ad9c5d3481c93766a522773531 ]
+
+A recent change added a disable to NAPI into macb_open, this was
+intended to only happen on the error path but accidentally applies
+to all paths. This causes NAPI to be disabled on the success path, which
+leads to the network to no longer functioning.
+
+Fixes: 014406babc1f ("net: cadence: macb: disable NAPI on error")
+Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
+Tested-by: Corentin Labbe <clabbe@baylibre.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cadence/macb_main.c |    9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/cadence/macb_main.c
++++ b/drivers/net/ethernet/cadence/macb_main.c
+@@ -2552,15 +2552,14 @@ static int macb_open(struct net_device *
+       if (bp->ptp_info)
+               bp->ptp_info->ptp_init(dev);
+ 
++      return 0;
++
+ napi_exit:
+       for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
+               napi_disable(&queue->napi);
+ pm_exit:
+-      if (err) {
+-              pm_runtime_put_sync(&bp->pdev->dev);
+-              return err;
+-      }
+-      return 0;
++      pm_runtime_put_sync(&bp->pdev->dev);
++      return err;
+ }
+ 
+ static int macb_close(struct net_device *dev)
diff --git a/queue-5.6/net-mlx5-disable-reload-while-removing-the-device.patch b/queue-5.6/net-mlx5-disable-reload-while-removing-the-device.patch

new file mode 100644 (file)

index 0000000..6af6db2
--- /dev/null
+++ b/queue-5.6/net-mlx5-disable-reload-while-removing-the-device.patch
@@ -0,0 +1,70 @@
+From foo@baz Tue 16 Jun 2020 09:42:59 AM CEST
+From: Parav Pandit <parav@mellanox.com>
+Date: Thu, 14 May 2020 05:12:56 -0500
+Subject: net/mlx5: Disable reload while removing the device
+
+From: Parav Pandit <parav@mellanox.com>
+
+[ Upstream commit 60904cd349abc98cb888fc28d1ca55a8e2cf87b3 ]
+
+While unregistration is in progress, user might be reloading the
+interface.
+This can race with unregistration in below flow which uses the
+resources which are getting disabled by reload flow.
+
+Hence, disable the devlink reloading first when removing the device.
+
+     CPU0                                   CPU1
+     ----                                   ----
+local_pci_remove()                  devlink_mutex
+  remove_one()                       devlink_nl_cmd_reload()
+    mlx5_unregister_device()           devlink_reload()
+                                       ops->reload_down()
+                                         mlx5_unload_one()
+
+Fixes: 4383cfcc65e7 ("net/mlx5: Add devlink reload")
+Signed-off-by: Parav Pandit <parav@mellanox.com>
+Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/devlink.c |    2 --
+ drivers/net/ethernet/mellanox/mlx5/core/main.c    |    2 ++
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+@@ -256,7 +256,6 @@ int mlx5_devlink_register(struct devlink
+               goto params_reg_err;
+       mlx5_devlink_set_params_init_values(devlink);
+       devlink_params_publish(devlink);
+-      devlink_reload_enable(devlink);
+       return 0;
+ 
+ params_reg_err:
+@@ -266,7 +265,6 @@ params_reg_err:
+ 
+ void mlx5_devlink_unregister(struct devlink *devlink)
+ {
+-      devlink_reload_disable(devlink);
+       devlink_params_unregister(devlink, mlx5_devlink_params,
+                                 ARRAY_SIZE(mlx5_devlink_params));
+       devlink_unregister(devlink);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1371,6 +1371,7 @@ static int init_one(struct pci_dev *pdev
+               dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
+ 
+       pci_save_state(pdev);
++      devlink_reload_enable(devlink);
+       return 0;
+ 
+ err_load_one:
+@@ -1388,6 +1389,7 @@ static void remove_one(struct pci_dev *p
+       struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+       struct devlink *devlink = priv_to_devlink(dev);
+ 
++      devlink_reload_disable(devlink);
+       mlx5_crdump_disable(dev);
+       mlx5_devlink_unregister(devlink);
+ 
diff --git a/queue-5.6/net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch b/queue-5.6/net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch

new file mode 100644 (file)

index 0000000..99ffe89
--- /dev/null
+++ b/queue-5.6/net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch
@@ -0,0 +1,104 @@
+From foo@baz Tue 16 Jun 2020 09:42:59 AM CEST
+From: Shay Drory <shayd@mellanox.com>
+Date: Wed, 6 May 2020 15:59:48 +0300
+Subject: net/mlx5: drain health workqueue in case of driver load error
+
+From: Shay Drory <shayd@mellanox.com>
+
+[ Upstream commit 42ea9f1b5c625fad225d4ac96a7e757dd4199d9c ]
+
+In case there is a work in the health WQ when we teardown the driver,
+in driver load error flow, the health work will try to read dev->iseg,
+which was already unmap in mlx5_pci_close().
+Fix it by draining the health workqueue first thing in mlx5_pci_close().
+
+Trace of the error:
+BUG: unable to handle page fault for address: ffffb5b141c18014
+PF: supervisor read access in kernel mode
+PF: error_code(0x0000) - not-present page
+PGD 1fe95d067 P4D 1fe95d067 PUD 1fe95e067 PMD 1b7823067 PTE 0
+Oops: 0000 [#1] SMP PTI
+CPU: 3 PID: 6755 Comm: kworker/u128:2 Not tainted 5.2.0-net-next-mlx5-hv_stats-over-last-worked-hyperv #1
+Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090006  04/28/2016
+Workqueue: mlx5_healtha050:00:02.0 mlx5_fw_fatal_reporter_err_work [mlx5_core]
+RIP: 0010:ioread32be+0x30/0x40
+Code: 00 77 27 48 81 ff 00 00 01 00 76 07 0f b7 d7 ed 0f c8 c3 55 48 c7 c6 3b ee d5 9f 48 89 e5 e8 67 fc ff ff b8 ff ff ff ff 5d c3 <8b> 07 0f c8 c3 66 66 2e 0f 1f 84 00 00 00 00 00 48 81 fe ff ff 03
+RSP: 0018:ffffb5b14c56fd78 EFLAGS: 00010292
+RAX: ffffb5b141c18000 RBX: ffff8e9f78a801c0 RCX: 0000000000000000
+RDX: 0000000000000001 RSI: ffff8e9f7ecd7628 RDI: ffffb5b141c18014
+RBP: ffffb5b14c56fd90 R08: 0000000000000001 R09: 0000000000000000
+R10: ffff8e9f372a2c30 R11: ffff8e9f87f4bc40 R12: ffff8e9f372a1fc0
+R13: ffff8e9f78a80000 R14: ffffffffc07136a0 R15: ffff8e9f78ae6f20
+FS:  0000000000000000(0000) GS:ffff8e9f7ecc0000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffffb5b141c18014 CR3: 00000001c8f82006 CR4: 00000000003606e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ ? mlx5_health_try_recover+0x4d/0x270 [mlx5_core]
+ mlx5_fw_fatal_reporter_recover+0x16/0x20 [mlx5_core]
+ devlink_health_reporter_recover+0x1c/0x50
+ devlink_health_report+0xfb/0x240
+ mlx5_fw_fatal_reporter_err_work+0x65/0xd0 [mlx5_core]
+ process_one_work+0x1fb/0x4e0
+ ? process_one_work+0x16b/0x4e0
+ worker_thread+0x4f/0x3d0
+ kthread+0x10d/0x140
+ ? process_one_work+0x4e0/0x4e0
+ ? kthread_cancel_delayed_work_sync+0x20/0x20
+ ret_from_fork+0x1f/0x30
+Modules linked in: nfsv3 rpcsec_gss_krb5 nfsv4 nfs fscache 8021q garp mrp stp llc ipmi_devintf ipmi_msghandler rpcrdma rdma_ucm ib_iser rdma_cm ib_umad iw_cm ib_ipoib libiscsi scsi_transport_iscsi ib_cm mlx5_ib ib_uverbs ib_core mlx5_core sb_edac crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel aes_x86_64 mlxfw crypto_simd cryptd glue_helper input_leds hyperv_fb intel_rapl_perf joydev serio_raw pci_hyperv pci_hyperv_mini mac_hid hv_balloon nfsd auth_rpcgss nfs_acl lockd grace sunrpc sch_fq_codel ip_tables x_tables autofs4 hv_utils hid_generic hv_storvsc ptp hid_hyperv hid hv_netvsc hyperv_keyboard pps_core scsi_transport_fc psmouse hv_vmbus i2c_piix4 floppy pata_acpi
+CR2: ffffb5b141c18014
+---[ end trace b12c5503157cad24 ]---
+RIP: 0010:ioread32be+0x30/0x40
+Code: 00 77 27 48 81 ff 00 00 01 00 76 07 0f b7 d7 ed 0f c8 c3 55 48 c7 c6 3b ee d5 9f 48 89 e5 e8 67 fc ff ff b8 ff ff ff ff 5d c3 <8b> 07 0f c8 c3 66 66 2e 0f 1f 84 00 00 00 00 00 48 81 fe ff ff 03
+RSP: 0018:ffffb5b14c56fd78 EFLAGS: 00010292
+RAX: ffffb5b141c18000 RBX: ffff8e9f78a801c0 RCX: 0000000000000000
+RDX: 0000000000000001 RSI: ffff8e9f7ecd7628 RDI: ffffb5b141c18014
+RBP: ffffb5b14c56fd90 R08: 0000000000000001 R09: 0000000000000000
+R10: ffff8e9f372a2c30 R11: ffff8e9f87f4bc40 R12: ffff8e9f372a1fc0
+R13: ffff8e9f78a80000 R14: ffffffffc07136a0 R15: ffff8e9f78ae6f20
+FS:  0000000000000000(0000) GS:ffff8e9f7ecc0000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffffb5b141c18014 CR3: 00000001c8f82006 CR4: 00000000003606e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+BUG: sleeping function called from invalid context at ./include/linux/percpu-rwsem.h:38
+in_atomic(): 0, irqs_disabled(): 1, pid: 6755, name: kworker/u128:2
+INFO: lockdep is turned off.
+CPU: 3 PID: 6755 Comm: kworker/u128:2 Tainted: G      D           5.2.0-net-next-mlx5-hv_stats-over-last-worked-hyperv #1
+Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090006  04/28/2016
+Workqueue: mlx5_healtha050:00:02.0 mlx5_fw_fatal_reporter_err_work [mlx5_core]
+Call Trace:
+ dump_stack+0x63/0x88
+ ___might_sleep+0x10a/0x130
+ __might_sleep+0x4a/0x80
+ exit_signals+0x33/0x230
+ ? blocking_notifier_call_chain+0x16/0x20
+ do_exit+0xb1/0xc30
+ ? kthread+0x10d/0x140
+ ? process_one_work+0x4e0/0x4e0
+
+Fixes: 52c368dc3da7 ("net/mlx5: Move health and page alloc init to mdev_init")
+Signed-off-by: Shay Drory <shayd@mellanox.com>
+Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -794,6 +794,11 @@ err_disable:
+ 
+ static void mlx5_pci_close(struct mlx5_core_dev *dev)
+ {
++      /* health work might still be active, and it needs pci bar in
++       * order to know the NIC state. Therefore, drain the health WQ
++       * before removing the pci bars
++       */
++      mlx5_drain_health_wq(dev);
+       iounmap(dev->iseg);
+       pci_clear_master(dev->pdev);
+       release_bar(dev->pdev);
diff --git a/queue-5.6/net-mlx5-fix-fatal-error-handling-during-device-load.patch b/queue-5.6/net-mlx5-fix-fatal-error-handling-during-device-load.patch

new file mode 100644 (file)

index 0000000..fc6baea
--- /dev/null
+++ b/queue-5.6/net-mlx5-fix-fatal-error-handling-during-device-load.patch
@@ -0,0 +1,54 @@
+From foo@baz Tue 16 Jun 2020 09:42:59 AM CEST
+From: Shay Drory <shayd@mellanox.com>
+Date: Thu, 7 May 2020 09:32:53 +0300
+Subject: net/mlx5: Fix fatal error handling during device load
+
+From: Shay Drory <shayd@mellanox.com>
+
+[ Upstream commit b6e0b6bebe0732d5cac51f0791f269d2413b8980 ]
+
+Currently, in case of fatal error during mlx5_load_one(), we cannot
+enter error state until mlx5_load_one() is finished, what can take
+several minutes until commands will get timeouts, because these commands
+can't be processed due to the fatal error.
+Fix it by setting dev->state as MLX5_DEVICE_STATE_INTERNAL_ERROR before
+requesting the lock.
+
+Fixes: c1d4d2e92ad6 ("net/mlx5: Avoid calling sleeping function by the health poll thread")
+Signed-off-by: Shay Drory <shayd@mellanox.com>
+Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/health.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
+@@ -193,15 +193,23 @@ static bool reset_fw_if_needed(struct ml
+ 
+ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
+ {
++      bool err_detected = false;
++
++      /* Mark the device as fatal in order to abort FW commands */
++      if ((check_fatal_sensors(dev) || force) &&
++          dev->state == MLX5_DEVICE_STATE_UP) {
++              dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
++              err_detected = true;
++      }
+       mutex_lock(&dev->intf_state_mutex);
+-      if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+-              goto unlock;
++      if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
++              goto unlock;/* a previous error is still being handled */
+       if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) {
+               dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+               goto unlock;
+       }
+ 
+-      if (check_fatal_sensors(dev) || force) {
++      if (check_fatal_sensors(dev) || force) { /* protected state setting */
+               dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+               mlx5_cmd_flush(dev);
+       }
diff --git a/queue-5.6/net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch b/queue-5.6/net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch

new file mode 100644 (file)

index 0000000..7f4139b
--- /dev/null
+++ b/queue-5.6/net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch
@@ -0,0 +1,44 @@
+From foo@baz Tue 16 Jun 2020 09:42:59 AM CEST
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+Date: Mon, 1 Jun 2020 16:03:44 +0300
+Subject: net/mlx5e: Fix repeated XSK usage on one channel
+
+From: Maxim Mikityanskiy <maximmi@mellanox.com>
+
+[ Upstream commit 36d45fb9d2fdf348d778bfe73f0427db1c6f9bc7 ]
+
+After an XSK is closed, the relevant structures in the channel are not
+zeroed. If an XSK is opened the second time on the same channel without
+recreating channels, the stray values in the structures will lead to
+incorrect operation of queues, which causes CQE errors, and the new
+socket doesn't work at all.
+
+This patch fixes the issue by explicitly zeroing XSK-related structs in
+the channel on XSK close. Note that those structs are zeroed on channel
+creation, and usually a configuration change (XDP program is set)
+happens on XSK open, which leads to recreating channels, so typical XSK
+usecases don't suffer from this issue. However, if XSKs are opened and
+closed on the same channel without removing the XDP program, this bug
+reproduces.
+
+Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support")
+Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+@@ -152,6 +152,10 @@ void mlx5e_close_xsk(struct mlx5e_channe
+       mlx5e_close_cq(&c->xskicosq.cq);
+       mlx5e_close_xdpsq(&c->xsksq);
+       mlx5e_close_cq(&c->xsksq.cq);
++
++      memset(&c->xskrq, 0, sizeof(c->xskrq));
++      memset(&c->xsksq, 0, sizeof(c->xsksq));
++      memset(&c->xskicosq, 0, sizeof(c->xskicosq));
+ }
+ 
+ void mlx5e_activate_xsk(struct mlx5e_channel *c)
diff --git a/queue-5.6/net-mvneta-do-not-redirect-frames-during-reconfiguration.patch b/queue-5.6/net-mvneta-do-not-redirect-frames-during-reconfiguration.patch

new file mode 100644 (file)

index 0000000..5678d5a
--- /dev/null
+++ b/queue-5.6/net-mvneta-do-not-redirect-frames-during-reconfiguration.patch
@@ -0,0 +1,67 @@
+From foo@baz Tue 16 Jun 2020 09:42:59 AM CEST
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Tue, 9 Jun 2020 00:02:39 +0200
+Subject: net: mvneta: do not redirect frames during reconfiguration
+
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+
+[ Upstream commit 62a502cc91f97e3ffd312d9b42e8d01a137c63ff ]
+
+Disable frames injection in mvneta_xdp_xmit routine during hw
+re-configuration in order to avoid hardware hangs
+
+Fixes: b0a43db9087a ("net: mvneta: add XDP_TX support")
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvneta.c |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -418,11 +418,17 @@ struct mvneta_pcpu_port {
+       u32                     cause_rx_tx;
+ };
+ 
++enum {
++      __MVNETA_DOWN,
++};
++
+ struct mvneta_port {
+       u8 id;
+       struct mvneta_pcpu_port __percpu        *ports;
+       struct mvneta_pcpu_stats __percpu       *stats;
+ 
++      unsigned long state;
++
+       int pkt_size;
+       void __iomem *base;
+       struct mvneta_rx_queue *rxqs;
+@@ -2066,6 +2072,9 @@ mvneta_xdp_xmit(struct net_device *dev,
+       int i, drops = 0;
+       u32 ret;
+ 
++      if (unlikely(test_bit(__MVNETA_DOWN, &pp->state)))
++              return -ENETDOWN;
++
+       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+               return -EINVAL;
+ 
+@@ -3489,12 +3498,16 @@ static void mvneta_start_dev(struct mvne
+ 
+       phylink_start(pp->phylink);
+       netif_tx_start_all_queues(pp->dev);
++
++      clear_bit(__MVNETA_DOWN, &pp->state);
+ }
+ 
+ static void mvneta_stop_dev(struct mvneta_port *pp)
+ {
+       unsigned int cpu;
+ 
++      set_bit(__MVNETA_DOWN, &pp->state);
++
+       phylink_stop(pp->phylink);
+ 
+       if (!pp->neta_armada3700) {
diff --git a/queue-5.6/selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch b/queue-5.6/selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch

new file mode 100644 (file)

index 0000000..c109ef3
--- /dev/null
+++ b/queue-5.6/selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch
@@ -0,0 +1,31 @@
+From foo@baz Tue 16 Jun 2020 09:42:59 AM CEST
+From: tannerlove <tannerlove@google.com>
+Date: Tue, 9 Jun 2020 17:21:32 -0400
+Subject: selftests/net: in rxtimestamp getopt_long needs terminating null entry
+
+From: tannerlove <tannerlove@google.com>
+
+[ Upstream commit 865a6cbb2288f8af7f9dc3b153c61b7014fdcf1e ]
+
+getopt_long requires the last element to be filled with zeros.
+Otherwise, passing an unrecognized option can cause a segfault.
+
+Fixes: 16e781224198 ("selftests/net: Add a test to validate behavior of rx timestamps")
+Signed-off-by: Tanner Love <tannerlove@google.com>
+Acked-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/networking/timestamping/rxtimestamp.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/tools/testing/selftests/networking/timestamping/rxtimestamp.c
++++ b/tools/testing/selftests/networking/timestamping/rxtimestamp.c
+@@ -115,6 +115,7 @@ static struct option long_options[] = {
+       { "tcp", no_argument, 0, 't' },
+       { "udp", no_argument, 0, 'u' },
+       { "ip", no_argument, 0, 'i' },
++      { NULL, 0, NULL, 0 },
+ };
+ 
+ static int next_port = 19999;
diff --git a/queue-5.6/series b/queue-5.6/series

index 6d823e03843f2b00fbf53145d8c5c21e8078083c..03f95e48a0d4dbef6654cd3a6c24759f780dc5d8 100644 (file)
--- a/queue-5.6/series
+++ b/queue-5.6/series
@@ -103,3 +103,12 @@ io_uring-fix-flush-req-refs-underflow.patch
  x86-mce-mm-unmap-the-entire-page-if-the-whole-page-is-affected-and-poisoned.patch
  firmware-imx-scu-support-one-tx-and-one-rx.patch
  firmware-imx-scu-fix-corruption-of-header.patch
+dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch
+net-mvneta-do-not-redirect-frames-during-reconfiguration.patch
+selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch
+net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch
+net-mlx5-fix-fatal-error-handling-during-device-load.patch
+net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch
+net-cadence-macb-disable-napi-on-error.patch
+net-macb-only-disable-napi-on-the-actual-error-path.patch
+net-mlx5-disable-reload-while-removing-the-device.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 16 Jun 2020 07:43:41 +0000 (09:43 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 16 Jun 2020 07:43:41 +0000 (09:43 +0200)
queue-5.6/dccp-fix-possible-memleak-in-dccp_init-and-dccp_fini.patch	[new file with mode: 0644]	patch \| blob
queue-5.6/net-cadence-macb-disable-napi-on-error.patch	[new file with mode: 0644]	patch \| blob
queue-5.6/net-macb-only-disable-napi-on-the-actual-error-path.patch	[new file with mode: 0644]	patch \| blob
queue-5.6/net-mlx5-disable-reload-while-removing-the-device.patch	[new file with mode: 0644]	patch \| blob
queue-5.6/net-mlx5-drain-health-workqueue-in-case-of-driver-load-error.patch	[new file with mode: 0644]	patch \| blob
queue-5.6/net-mlx5-fix-fatal-error-handling-during-device-load.patch	[new file with mode: 0644]	patch \| blob
queue-5.6/net-mlx5e-fix-repeated-xsk-usage-on-one-channel.patch	[new file with mode: 0644]	patch \| blob
queue-5.6/net-mvneta-do-not-redirect-frames-during-reconfiguration.patch	[new file with mode: 0644]	patch \| blob
queue-5.6/selftests-net-in-rxtimestamp-getopt_long-needs-terminating-null-entry.patch	[new file with mode: 0644]	patch \| blob
queue-5.6/series		patch \| blob \| blame \| history