4.9-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 28 Aug 2017 07:18:30 +0000 (09:18 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 28 Aug 2017 07:18:30 +0000 (09:18 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Aug 2017 07:18:30 +0000 (09:18 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 28 Aug 2017 07:18:30 +0000 (09:18 +0200)
diff --git a/queue-4.9/acpi-apei-add-missing-synchronize_rcu-on-notify_sci-removal.patch b/queue-4.9/acpi-apei-add-missing-synchronize_rcu-on-notify_sci-removal.patch

new file mode 100644 (file)

index 0000000..1e1ffe4
--- /dev/null
+++ b/queue-4.9/acpi-apei-add-missing-synchronize_rcu-on-notify_sci-removal.patch
@@ -0,0 +1,34 @@
+From 7d64f82cceb21e6d95db312d284f5f195e120154 Mon Sep 17 00:00:00 2001
+From: James Morse <james.morse@arm.com>
+Date: Thu, 16 Mar 2017 14:30:39 +0000
+Subject: ACPI / APEI: Add missing synchronize_rcu() on NOTIFY_SCI removal
+
+From: James Morse <james.morse@arm.com>
+
+commit 7d64f82cceb21e6d95db312d284f5f195e120154 upstream.
+
+When removing a GHES device notified by SCI, list_del_rcu() is used,
+ghes_remove() should call synchronize_rcu() before it goes on to call
+kfree(ghes), otherwise concurrent RCU readers may still hold this list
+entry after it has been freed.
+
+Signed-off-by: James Morse <james.morse@arm.com>
+Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
+Fixes: 81e88fdc432a (ACPI, APEI, Generic Hardware Error Source POLL/IRQ/NMI notification type support)
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/acpi/apei/ghes.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/acpi/apei/ghes.c
++++ b/drivers/acpi/apei/ghes.c
+@@ -1072,6 +1072,7 @@ static int ghes_remove(struct platform_d
+               if (list_empty(&ghes_sci))
+                       unregister_acpi_hed_notifier(&ghes_notifier_sci);
+               mutex_unlock(&ghes_list_mutex);
++              synchronize_rcu();
+               break;
+       case ACPI_HEST_NOTIFY_NMI:
+               ghes_nmi_remove(ghes);
diff --git a/queue-4.9/acpi-ec-fix-regression-related-to-wrong-ecdt-initialization-order.patch b/queue-4.9/acpi-ec-fix-regression-related-to-wrong-ecdt-initialization-order.patch

new file mode 100644 (file)

index 0000000..e66cf83
--- /dev/null
+++ b/queue-4.9/acpi-ec-fix-regression-related-to-wrong-ecdt-initialization-order.patch
@@ -0,0 +1,104 @@
+From 98529b9272e06a7767034fb8a32e43cdecda240a Mon Sep 17 00:00:00 2001
+From: Lv Zheng <lv.zheng@intel.com>
+Date: Wed, 16 Aug 2017 15:29:49 +0800
+Subject: ACPI: EC: Fix regression related to wrong ECDT initialization order
+
+From: Lv Zheng <lv.zheng@intel.com>
+
+commit 98529b9272e06a7767034fb8a32e43cdecda240a upstream.
+
+Commit 2a5708409e4e (ACPI / EC: Fix a gap that ECDT EC cannot handle
+EC events) introduced acpi_ec_ecdt_start(), but that function is
+invoked before acpi_ec_query_init(), which is too early.  This causes
+the kernel to crash if an EC event occurs after boot, when ec_query_wq
+is not valid:
+
+ BUG: unable to handle kernel NULL pointer dereference at 0000000000000102
+ ...
+ Workqueue: events acpi_ec_event_handler
+ task: ffff9f539790dac0 task.stack: ffffb437c0e10000
+ RIP: 0010:__queue_work+0x32/0x430
+
+Normally, the DSDT EC should always be valid, so acpi_ec_ecdt_start()
+is actually a no-op in the majority of cases.  However, commit
+c712bb58d827 (ACPI / EC: Add support to skip boot stage DSDT probe)
+caused the probing of the DSDT EC as the "boot EC" to be skipped when
+the ECDT EC is valid and uncovered the bug.
+
+Fix this issue by invoking acpi_ec_ecdt_start() after acpi_ec_query_init()
+in acpi_ec_init().
+
+Link: https://jira01.devtools.intel.com/browse/LCK-4348
+Fixes: 2a5708409e4e (ACPI / EC: Fix a gap that ECDT EC cannot handle EC events)
+Fixes: c712bb58d827 (ACPI / EC: Add support to skip boot stage DSDT probe)
+Reported-by: Wang Wendy <wendy.wang@intel.com>
+Tested-by: Feng Chenzhou <chenzhoux.feng@intel.com>
+Signed-off-by: Lv Zheng <lv.zheng@intel.com>
+[ rjw: Changelog ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/acpi/ec.c       |   17 +++++++----------
+ drivers/acpi/internal.h |    1 -
+ drivers/acpi/scan.c     |    1 -
+ 3 files changed, 7 insertions(+), 12 deletions(-)
+
+--- a/drivers/acpi/ec.c
++++ b/drivers/acpi/ec.c
+@@ -1728,7 +1728,7 @@ error:
+  * functioning ECDT EC first in order to handle the events.
+  * https://bugzilla.kernel.org/show_bug.cgi?id=115021
+  */
+-int __init acpi_ec_ecdt_start(void)
++static int __init acpi_ec_ecdt_start(void)
+ {
+       acpi_handle handle;
+ 
+@@ -1959,20 +1959,17 @@ static inline void acpi_ec_query_exit(vo
+ int __init acpi_ec_init(void)
+ {
+       int result;
++      int ecdt_fail, dsdt_fail;
+ 
+       /* register workqueue for _Qxx evaluations */
+       result = acpi_ec_query_init();
+       if (result)
+-              goto err_exit;
+-      /* Now register the driver for the EC */
+-      result = acpi_bus_register_driver(&acpi_ec_driver);
+-      if (result)
+-              goto err_exit;
++              return result;
+ 
+-err_exit:
+-      if (result)
+-              acpi_ec_query_exit();
+-      return result;
++      /* Drivers must be started after acpi_ec_query_init() */
++      ecdt_fail = acpi_ec_ecdt_start();
++      dsdt_fail = acpi_bus_register_driver(&acpi_ec_driver);
++      return ecdt_fail && dsdt_fail ? -ENODEV : 0;
+ }
+ 
+ /* EC driver currently not unloadable */
+--- a/drivers/acpi/internal.h
++++ b/drivers/acpi/internal.h
+@@ -185,7 +185,6 @@ typedef int (*acpi_ec_query_func) (void
+ int acpi_ec_init(void);
+ int acpi_ec_ecdt_probe(void);
+ int acpi_ec_dsdt_probe(void);
+-int acpi_ec_ecdt_start(void);
+ void acpi_ec_block_transactions(void);
+ void acpi_ec_unblock_transactions(void);
+ int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
+--- a/drivers/acpi/scan.c
++++ b/drivers/acpi/scan.c
+@@ -2051,7 +2051,6 @@ int __init acpi_scan_init(void)
+ 
+       acpi_gpe_apply_masked_gpes();
+       acpi_update_all_gpes();
+-      acpi_ec_ecdt_start();
+ 
+       acpi_scan_initialized = true;
+ 
diff --git a/queue-4.9/acpi-ioapic-clear-on-stack-resource-before-using-it.patch b/queue-4.9/acpi-ioapic-clear-on-stack-resource-before-using-it.patch

new file mode 100644 (file)

index 0000000..af7e873
--- /dev/null
+++ b/queue-4.9/acpi-ioapic-clear-on-stack-resource-before-using-it.patch
@@ -0,0 +1,42 @@
+From e3d5092b6756b9e0b08f94bbeafcc7afe19f0996 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <jroedel@suse.de>
+Date: Wed, 22 Mar 2017 18:33:23 +0100
+Subject: ACPI: ioapic: Clear on-stack resource before using it
+
+From: Joerg Roedel <jroedel@suse.de>
+
+commit e3d5092b6756b9e0b08f94bbeafcc7afe19f0996 upstream.
+
+The on-stack resource-window 'win' in setup_res() is not
+properly initialized. This causes the pointers in the
+embedded 'struct resource' to contain stale addresses.
+
+These pointers (in my case the ->child pointer) later get
+propagated to the global iomem_resources list, causing a #GP
+exception when the list is traversed in
+iomem_map_sanity_check().
+
+Fixes: c183619b63ec (x86/irq, ACPI: Implement ACPI driver to support IOAPIC hotplug)
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/acpi/ioapic.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/acpi/ioapic.c
++++ b/drivers/acpi/ioapic.c
+@@ -45,6 +45,12 @@ static acpi_status setup_res(struct acpi
+       struct resource *res = data;
+       struct resource_win win;
+ 
++      /*
++       * We might assign this to 'res' later, make sure all pointers are
++       * cleared before the resource is added to the global list
++       */
++      memset(&win, 0, sizeof(win));
++
+       res->flags = 0;
+       if (acpi_dev_filter_resource_type(acpi_res, IORESOURCE_MEM))
+               return AE_OK;
diff --git a/queue-4.9/ntb-no-sleep-in-ntb_async_tx_submit.patch b/queue-4.9/ntb-no-sleep-in-ntb_async_tx_submit.patch

new file mode 100644 (file)

index 0000000..4bd0cca
--- /dev/null
+++ b/queue-4.9/ntb-no-sleep-in-ntb_async_tx_submit.patch
@@ -0,0 +1,139 @@
+From 88931ec3dc11e7dbceb3b0df455693873b508fbe Mon Sep 17 00:00:00 2001
+From: Allen Hubbe <Allen.Hubbe@dell.com>
+Date: Fri, 9 Jun 2017 18:06:36 -0400
+Subject: ntb: no sleep in ntb_async_tx_submit
+
+From: Allen Hubbe <Allen.Hubbe@dell.com>
+
+commit 88931ec3dc11e7dbceb3b0df455693873b508fbe upstream.
+
+Do not sleep in ntb_async_tx_submit, which could deadlock.
+This reverts commit "8c874cc140d667f84ae4642bb5b5e0d6396d2ca4"
+
+Fixes: 8c874cc140d6 ("NTB: Address out of DMA descriptor issue with NTB")
+Reported-by: Jia-Ju Bai <baijiaju1990@163.com>
+Signed-off-by: Allen Hubbe <Allen.Hubbe@dell.com>
+Acked-by: Dave Jiang <dave.jiang@intel.com>
+Signed-off-by: Jon Mason <jdmason@kudzu.us>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/ntb/ntb_transport.c |   50 ++++++--------------------------------------
+ 1 file changed, 7 insertions(+), 43 deletions(-)
+
+--- a/drivers/ntb/ntb_transport.c
++++ b/drivers/ntb/ntb_transport.c
+@@ -176,14 +176,12 @@ struct ntb_transport_qp {
+       u64 rx_err_ver;
+       u64 rx_memcpy;
+       u64 rx_async;
+-      u64 dma_rx_prep_err;
+       u64 tx_bytes;
+       u64 tx_pkts;
+       u64 tx_ring_full;
+       u64 tx_err_no_buf;
+       u64 tx_memcpy;
+       u64 tx_async;
+-      u64 dma_tx_prep_err;
+ };
+ 
+ struct ntb_transport_mw {
+@@ -256,8 +254,6 @@ enum {
+ #define QP_TO_MW(nt, qp)      ((qp) % nt->mw_count)
+ #define NTB_QP_DEF_NUM_ENTRIES        100
+ #define NTB_LINK_DOWN_TIMEOUT 10
+-#define DMA_RETRIES           20
+-#define DMA_OUT_RESOURCE_TO   msecs_to_jiffies(50)
+ 
+ static void ntb_transport_rxc_db(unsigned long data);
+ static const struct ntb_ctx_ops ntb_transport_ops;
+@@ -518,12 +514,6 @@ static ssize_t debugfs_read(struct file
+       out_offset += snprintf(buf + out_offset, out_count - out_offset,
+                              "free tx - \t%u\n",
+                              ntb_transport_tx_free_entry(qp));
+-      out_offset += snprintf(buf + out_offset, out_count - out_offset,
+-                             "DMA tx prep err - \t%llu\n",
+-                             qp->dma_tx_prep_err);
+-      out_offset += snprintf(buf + out_offset, out_count - out_offset,
+-                             "DMA rx prep err - \t%llu\n",
+-                             qp->dma_rx_prep_err);
+ 
+       out_offset += snprintf(buf + out_offset, out_count - out_offset,
+                              "\n");
+@@ -770,8 +760,6 @@ static void ntb_qp_link_down_reset(struc
+       qp->tx_err_no_buf = 0;
+       qp->tx_memcpy = 0;
+       qp->tx_async = 0;
+-      qp->dma_tx_prep_err = 0;
+-      qp->dma_rx_prep_err = 0;
+ }
+ 
+ static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
+@@ -1314,7 +1302,6 @@ static int ntb_async_rx_submit(struct nt
+       struct dmaengine_unmap_data *unmap;
+       dma_cookie_t cookie;
+       void *buf = entry->buf;
+-      int retries = 0;
+ 
+       len = entry->len;
+       device = chan->device;
+@@ -1343,22 +1330,11 @@ static int ntb_async_rx_submit(struct nt
+ 
+       unmap->from_cnt = 1;
+ 
+-      for (retries = 0; retries < DMA_RETRIES; retries++) {
+-              txd = device->device_prep_dma_memcpy(chan,
+-                                                   unmap->addr[1],
+-                                                   unmap->addr[0], len,
+-                                                   DMA_PREP_INTERRUPT);
+-              if (txd)
+-                      break;
+-
+-              set_current_state(TASK_INTERRUPTIBLE);
+-              schedule_timeout(DMA_OUT_RESOURCE_TO);
+-      }
+-
+-      if (!txd) {
+-              qp->dma_rx_prep_err++;
++      txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
++                                           unmap->addr[0], len,
++                                           DMA_PREP_INTERRUPT);
++      if (!txd)
+               goto err_get_unmap;
+-      }
+ 
+       txd->callback_result = ntb_rx_copy_callback;
+       txd->callback_param = entry;
+@@ -1603,7 +1579,6 @@ static int ntb_async_tx_submit(struct nt
+       struct dmaengine_unmap_data *unmap;
+       dma_addr_t dest;
+       dma_cookie_t cookie;
+-      int retries = 0;
+ 
+       device = chan->device;
+       dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
+@@ -1625,21 +1600,10 @@ static int ntb_async_tx_submit(struct nt
+ 
+       unmap->to_cnt = 1;
+ 
+-      for (retries = 0; retries < DMA_RETRIES; retries++) {
+-              txd = device->device_prep_dma_memcpy(chan, dest,
+-                                                   unmap->addr[0], len,
+-                                                   DMA_PREP_INTERRUPT);
+-              if (txd)
+-                      break;
+-
+-              set_current_state(TASK_INTERRUPTIBLE);
+-              schedule_timeout(DMA_OUT_RESOURCE_TO);
+-      }
+-
+-      if (!txd) {
+-              qp->dma_tx_prep_err++;
++      txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
++                                           DMA_PREP_INTERRUPT);
++      if (!txd)
+               goto err_get_unmap;
+-      }
+ 
+       txd->callback_result = ntb_tx_copy_callback;
+       txd->callback_param = entry;
diff --git a/queue-4.9/ntb-ntb_test-ensure-the-link-is-up-before-trying-to-configure-the-mws.patch b/queue-4.9/ntb-ntb_test-ensure-the-link-is-up-before-trying-to-configure-the-mws.patch

new file mode 100644 (file)

index 0000000..820e4b1
--- /dev/null
+++ b/queue-4.9/ntb-ntb_test-ensure-the-link-is-up-before-trying-to-configure-the-mws.patch
@@ -0,0 +1,39 @@
+From 0eb46345364d7318b11068c46e8a68d5dc10f65e Mon Sep 17 00:00:00 2001
+From: Logan Gunthorpe <logang@deltatee.com>
+Date: Tue, 25 Jul 2017 14:57:42 -0600
+Subject: ntb: ntb_test: ensure the link is up before trying to configure the mws
+
+From: Logan Gunthorpe <logang@deltatee.com>
+
+commit 0eb46345364d7318b11068c46e8a68d5dc10f65e upstream.
+
+After the link tests, there is a race on one side of the test for
+the link coming up. It's possible, in some cases, for the test script
+to write to the 'peer_trans' files before the link has come up.
+
+To fix this, we simply use the link event file to ensure both sides
+see the link as up before continuning.
+
+Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
+Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
+Signed-off-by: Jon Mason <jdmason@kudzu.us>
+Fixes: a9c59ef77458 ("ntb_test: Add a selftest script for the NTB subsystem")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/ntb/ntb_test.sh |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/tools/testing/selftests/ntb/ntb_test.sh
++++ b/tools/testing/selftests/ntb/ntb_test.sh
+@@ -326,6 +326,10 @@ function ntb_tool_tests()
+       link_test $LOCAL_TOOL $REMOTE_TOOL
+       link_test $REMOTE_TOOL $LOCAL_TOOL
+ 
++      #Ensure the link is up on both sides before continuing
++      write_file Y $LOCAL_TOOL/link_event
++      write_file Y $REMOTE_TOOL/link_event
++
+       for PEER_TRANS in $(ls $LOCAL_TOOL/peer_trans*); do
+               PT=$(basename $PEER_TRANS)
+               write_file $MW_SIZE $LOCAL_TOOL/$PT
diff --git a/queue-4.9/ntb-ntb_test-fix-bug-printing-ntb_perf-results.patch b/queue-4.9/ntb-ntb_test-fix-bug-printing-ntb_perf-results.patch

new file mode 100644 (file)

index 0000000..b283585
--- /dev/null
+++ b/queue-4.9/ntb-ntb_test-fix-bug-printing-ntb_perf-results.patch
@@ -0,0 +1,34 @@
+From 07b0b22b3e58824f70b9188d085d400069ca3240 Mon Sep 17 00:00:00 2001
+From: Logan Gunthorpe <logang@deltatee.com>
+Date: Mon, 5 Jun 2017 10:13:24 -0600
+Subject: NTB: ntb_test: fix bug printing ntb_perf results
+
+From: Logan Gunthorpe <logang@deltatee.com>
+
+commit 07b0b22b3e58824f70b9188d085d400069ca3240 upstream.
+
+The code mistakenly prints the local perf results for the remote test
+so the script reports identical results for both directions. Fix this
+by ensuring we print the remote result.
+
+Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
+Fixes: a9c59ef77458 ("ntb_test: Add a selftest script for the NTB subsystem")
+Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
+Signed-off-by: Jon Mason <jdmason@kudzu.us>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/ntb/ntb_test.sh |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/ntb/ntb_test.sh
++++ b/tools/testing/selftests/ntb/ntb_test.sh
+@@ -305,7 +305,7 @@ function perf_test()
+       echo "Running remote perf test $WITH DMA"
+       write_file "" $REMOTE_PERF/run
+       echo -n "  "
+-      read_file $LOCAL_PERF/run
++      read_file $REMOTE_PERF/run
+       echo "  Passed"
+ 
+       _modprobe -r ntb_perf
diff --git a/queue-4.9/ntb-transport-shouldn-t-disable-link-due-to-bogus-values-in-spads.patch b/queue-4.9/ntb-transport-shouldn-t-disable-link-due-to-bogus-values-in-spads.patch

new file mode 100644 (file)

index 0000000..40284a4
--- /dev/null
+++ b/queue-4.9/ntb-transport-shouldn-t-disable-link-due-to-bogus-values-in-spads.patch
@@ -0,0 +1,40 @@
+From f3fd2afed8eee91620d05b69ab94c14793c849d7 Mon Sep 17 00:00:00 2001
+From: Dave Jiang <dave.jiang@intel.com>
+Date: Fri, 28 Jul 2017 15:10:48 -0700
+Subject: ntb: transport shouldn't disable link due to bogus values in SPADs
+
+From: Dave Jiang <dave.jiang@intel.com>
+
+commit f3fd2afed8eee91620d05b69ab94c14793c849d7 upstream.
+
+It seems that under certain scenarios the SPAD can have bogus values caused
+by an agent (i.e. BIOS or other software) that is not the kernel driver, and
+that causes memory window setup failure. This should not cause the link to
+be disabled because if we do that, the driver will never recover again. We
+have verified in testing that this issue happens and prevents proper link
+recovery.
+
+Signed-off-by: Dave Jiang <dave.jiang@intel.com>
+Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
+Signed-off-by: Jon Mason <jdmason@kudzu.us>
+Fixes: 84f766855f61 ("ntb: stop link work when we do not have memory")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/ntb/ntb_transport.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/drivers/ntb/ntb_transport.c
++++ b/drivers/ntb/ntb_transport.c
+@@ -921,10 +921,8 @@ out1:
+               ntb_free_mw(nt, i);
+ 
+       /* if there's an actual failure, we should just bail */
+-      if (rc < 0) {
+-              ntb_link_disable(ndev);
++      if (rc < 0)
+               return;
+-      }
+ 
+ out:
+       if (ntb_link_is_up(ndev, NULL, NULL) == 1)
diff --git a/queue-4.9/ntb_transport-fix-bug-calculating-num_qps_mw.patch b/queue-4.9/ntb_transport-fix-bug-calculating-num_qps_mw.patch

new file mode 100644 (file)

index 0000000..198dcc3
--- /dev/null
+++ b/queue-4.9/ntb_transport-fix-bug-calculating-num_qps_mw.patch
@@ -0,0 +1,51 @@
+From 8e8496e0e9564b66165f5219a4e8ed20b0d3fc6b Mon Sep 17 00:00:00 2001
+From: Logan Gunthorpe <logang@deltatee.com>
+Date: Mon, 5 Jun 2017 14:00:53 -0600
+Subject: ntb_transport: fix bug calculating num_qps_mw
+
+From: Logan Gunthorpe <logang@deltatee.com>
+
+commit 8e8496e0e9564b66165f5219a4e8ed20b0d3fc6b upstream.
+
+A divide by zero error occurs if qp_count is less than mw_count because
+num_qps_mw is calculated to be zero. The calculation appears to be
+incorrect.
+
+The requirement is for num_qps_mw to be set to qp_count / mw_count
+with any remainder divided among the earlier mws.
+
+For example, if mw_count is 5 and qp_count is 12 then mws 0 and 1
+will have 3 qps per window and mws 2 through 4 will have 2 qps per window.
+Thus, when mw_num < qp_count % mw_count, num_qps_mw is 1 higher
+than when mw_num >= qp_count.
+
+Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
+Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers")
+Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
+Signed-off-by: Jon Mason <jdmason@kudzu.us>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/ntb/ntb_transport.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/ntb/ntb_transport.c
++++ b/drivers/ntb/ntb_transport.c
+@@ -625,7 +625,7 @@ static int ntb_transport_setup_qp_mw(str
+       if (!mw->virt_addr)
+               return -ENOMEM;
+ 
+-      if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count)
++      if (mw_num < qp_count % mw_count)
+               num_qps_mw = qp_count / mw_count + 1;
+       else
+               num_qps_mw = qp_count / mw_count;
+@@ -1002,7 +1002,7 @@ static int ntb_transport_init_queue(stru
+       qp->event_handler = NULL;
+       ntb_qp_link_down_reset(qp);
+ 
+-      if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count)
++      if (mw_num < qp_count % mw_count)
+               num_qps_mw = qp_count / mw_count + 1;
+       else
+               num_qps_mw = qp_count / mw_count;
diff --git a/queue-4.9/ntb_transport-fix-qp-count-bug.patch b/queue-4.9/ntb_transport-fix-qp-count-bug.patch

new file mode 100644 (file)

index 0000000..972427b
--- /dev/null
+++ b/queue-4.9/ntb_transport-fix-qp-count-bug.patch
@@ -0,0 +1,42 @@
+From cb827ee6ccc3e480f0d9c0e8e53eef55be5b0414 Mon Sep 17 00:00:00 2001
+From: Logan Gunthorpe <logang@deltatee.com>
+Date: Mon, 5 Jun 2017 14:00:52 -0600
+Subject: ntb_transport: fix qp count bug
+
+From: Logan Gunthorpe <logang@deltatee.com>
+
+commit cb827ee6ccc3e480f0d9c0e8e53eef55be5b0414 upstream.
+
+In cases where there are more mw's than spads/2-2, the mw count gets
+reduced to match the limitation. ntb_transport also tries to ensure that
+there are fewer qps than mws but uses the full mw count instead of
+the reduced one. When this happens, the math in
+'ntb_transport_setup_qp_mw' will get confused and result in a kernel
+paging request bug.
+
+This patch fixes the bug by reducing qp_count to the reduced mw count
+instead of the full mw count.
+
+Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
+Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers")
+Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
+Signed-off-by: Jon Mason <jdmason@kudzu.us>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/ntb/ntb_transport.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/ntb/ntb_transport.c
++++ b/drivers/ntb/ntb_transport.c
+@@ -1125,8 +1125,8 @@ static int ntb_transport_probe(struct nt
+       qp_count = ilog2(qp_bitmap);
+       if (max_num_clients && max_num_clients < qp_count)
+               qp_count = max_num_clients;
+-      else if (mw_count < qp_count)
+-              qp_count = mw_count;
++      else if (nt->mw_count < qp_count)
++              qp_count = nt->mw_count;
+ 
+       qp_bitmap &= BIT_ULL(qp_count) - 1;
+ 
diff --git a/queue-4.9/powerpc-mm-ensure-cpumask-update-is-ordered.patch b/queue-4.9/powerpc-mm-ensure-cpumask-update-is-ordered.patch

new file mode 100644 (file)

index 0000000..4ac54e3
--- /dev/null
+++ b/queue-4.9/powerpc-mm-ensure-cpumask-update-is-ordered.patch
@@ -0,0 +1,86 @@
+From 1a92a80ad386a1a6e3b36d576d52a1a456394b70 Mon Sep 17 00:00:00 2001
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Date: Mon, 24 Jul 2017 14:28:00 +1000
+Subject: powerpc/mm: Ensure cpumask update is ordered
+
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+
+commit 1a92a80ad386a1a6e3b36d576d52a1a456394b70 upstream.
+
+There is no guarantee that the various isync's involved with
+the context switch will order the update of the CPU mask with
+the first TLB entry for the new context being loaded by the HW.
+
+Be safe here and add a memory barrier to order any subsequent
+load/store which may bring entries into the TLB.
+
+The corresponding barrier on the other side already exists as
+pte updates use pte_xchg() which uses __cmpxchg_u64 which has
+a sync after the atomic operation.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
+[mpe: Add comments in the code]
+[mpe: Backport to 4.12, minor context change]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/mmu_context.h      |   20 +++++++++++++++++++-
+ arch/powerpc/include/asm/pgtable-be-types.h |    1 +
+ arch/powerpc/include/asm/pgtable-types.h    |    1 +
+ 3 files changed, 21 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/include/asm/mmu_context.h
++++ b/arch/powerpc/include/asm/mmu_context.h
+@@ -75,9 +75,27 @@ static inline void switch_mm_irqs_off(st
+                                     struct task_struct *tsk)
+ {
+       /* Mark this context has been used on the new CPU */
+-      if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next)))
++      if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
+               cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+ 
++              /*
++               * This full barrier orders the store to the cpumask above vs
++               * a subsequent operation which allows this CPU to begin loading
++               * translations for next.
++               *
++               * When using the radix MMU that operation is the load of the
++               * MMU context id, which is then moved to SPRN_PID.
++               *
++               * For the hash MMU it is either the first load from slb_cache
++               * in switch_slb(), and/or the store of paca->mm_ctx_id in
++               * copy_mm_to_paca().
++               *
++               * On the read side the barrier is in pte_xchg(), which orders
++               * the store to the PTE vs the load of mm_cpumask.
++               */
++              smp_mb();
++      }
++
+       /* 32-bit keeps track of the current PGDIR in the thread struct */
+ #ifdef CONFIG_PPC32
+       tsk->thread.pgdir = next->pgd;
+--- a/arch/powerpc/include/asm/pgtable-be-types.h
++++ b/arch/powerpc/include/asm/pgtable-be-types.h
+@@ -87,6 +87,7 @@ static inline bool pte_xchg(pte_t *ptep,
+       unsigned long *p = (unsigned long *)ptep;
+       __be64 prev;
+ 
++      /* See comment in switch_mm_irqs_off() */
+       prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old),
+                                            (__force unsigned long)pte_raw(new));
+ 
+--- a/arch/powerpc/include/asm/pgtable-types.h
++++ b/arch/powerpc/include/asm/pgtable-types.h
+@@ -62,6 +62,7 @@ static inline bool pte_xchg(pte_t *ptep,
+ {
+       unsigned long *p = (unsigned long *)ptep;
+ 
++      /* See comment in switch_mm_irqs_off() */
+       return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new));
+ }
+ #endif
diff --git a/queue-4.9/series b/queue-4.9/series

index 0276aa317795cca31ee52cc13603dc6ea0734224..b28be797c7cdcc2878fa38b251d8465f0322393c 100644 (file)
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -72,3 +72,13 @@ iio-imu-adis16480-fix-acceleration-scale-factor-for-adis16480.patch
  iio-hid-sensor-trigger-fix-the-race-with-user-space-powering-up-sensors.patch
  staging-rtl8188eu-add-rnx-n150nub-support.patch
  clarify-and-fix-max_lfs_filesize-macros.patch
+ntb_transport-fix-qp-count-bug.patch
+ntb_transport-fix-bug-calculating-num_qps_mw.patch
+ntb-ntb_test-fix-bug-printing-ntb_perf-results.patch
+ntb-no-sleep-in-ntb_async_tx_submit.patch
+ntb-ntb_test-ensure-the-link-is-up-before-trying-to-configure-the-mws.patch
+ntb-transport-shouldn-t-disable-link-due-to-bogus-values-in-spads.patch
+acpi-ioapic-clear-on-stack-resource-before-using-it.patch
+acpi-apei-add-missing-synchronize_rcu-on-notify_sci-removal.patch
+acpi-ec-fix-regression-related-to-wrong-ecdt-initialization-order.patch
+powerpc-mm-ensure-cpumask-update-is-ordered.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 28 Aug 2017 07:18:30 +0000 (09:18 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 28 Aug 2017 07:18:30 +0000 (09:18 +0200)
queue-4.9/acpi-apei-add-missing-synchronize_rcu-on-notify_sci-removal.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/acpi-ec-fix-regression-related-to-wrong-ecdt-initialization-order.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/acpi-ioapic-clear-on-stack-resource-before-using-it.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ntb-no-sleep-in-ntb_async_tx_submit.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ntb-ntb_test-ensure-the-link-is-up-before-trying-to-configure-the-mws.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ntb-ntb_test-fix-bug-printing-ntb_perf-results.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ntb-transport-shouldn-t-disable-link-due-to-bogus-values-in-spads.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ntb_transport-fix-bug-calculating-num_qps_mw.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ntb_transport-fix-qp-count-bug.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/powerpc-mm-ensure-cpumask-update-is-ordered.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/series		patch \| blob \| blame \| history