]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.18-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 10 Nov 2018 18:24:52 +0000 (10:24 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 10 Nov 2018 18:24:52 +0000 (10:24 -0800)
added patches:
acpi-nfit-fix-address-range-scrub-completion-tracking.patch

queue-4.18/acpi-nfit-fix-address-range-scrub-completion-tracking.patch [new file with mode: 0644]
queue-4.18/series

diff --git a/queue-4.18/acpi-nfit-fix-address-range-scrub-completion-tracking.patch b/queue-4.18/acpi-nfit-fix-address-range-scrub-completion-tracking.patch
new file mode 100644 (file)
index 0000000..62cfb19
--- /dev/null
@@ -0,0 +1,403 @@
+From d3abaf43bab8d5b0a3c6b982100d9e2be96de4ad Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Sat, 13 Oct 2018 20:32:17 -0700
+Subject: acpi, nfit: Fix Address Range Scrub completion tracking
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit d3abaf43bab8d5b0a3c6b982100d9e2be96de4ad upstream.
+
+The Address Range Scrub implementation tried to skip running scrubs
+against ranges that were already scrubbed by the BIOS. Unfortunately
+that support also resulted in early scrub completions as evidenced by
+this debug output from nfit_test:
+
+    nd_region region9: ARS: range 1 short complete
+    nd_region region3: ARS: range 1 short complete
+    nd_region region4: ARS: range 2 ARS start (0)
+    nd_region region4: ARS: range 2 short complete
+
+...i.e. completions without any indications that the scrub was started.
+
+This state of affairs was hard to see in the code due to the
+proliferation of state bits and mistakenly trying to track done state
+per-range when the completion is a global property of the bus.
+
+So, kill the four ARS state bits (ARS_REQ, ARS_REQ_REDO, ARS_DONE, and
+ARS_SHORT), and replace them with just 2 request flags ARS_REQ_SHORT and
+ARS_REQ_LONG. The implementation will still complete and reap the
+results of BIOS initiated ARS, but it will not attempt to use that
+information to affect the completion status of scrubbing the ranges from
+a Linux perspective.
+
+Instead, try to synchronously run a short ARS per range at init time and
+schedule a long scrub in the background. If ARS is busy with an ARS
+request, schedule both a short and a long scrub for when ARS returns to
+idle. This logic also satisfies the intent of what ARS_REQ_REDO was
+trying to achieve. The new rule is that the REQ flag stays set until the
+next successful ars_start() for that range.
+
+With the new policy that the REQ flags are not cleared until the next
+start, the implementation no longer loses requests as can be seen from
+the following log:
+
+    nd_region region3: ARS: range 1 ARS start short (0)
+    nd_region region9: ARS: range 1 ARS start short (0)
+    nd_region region3: ARS: range 1 complete
+    nd_region region4: ARS: range 2 ARS start short (0)
+    nd_region region9: ARS: range 1 complete
+    nd_region region9: ARS: range 1 ARS start long (0)
+    nd_region region4: ARS: range 2 complete
+    nd_region region3: ARS: range 1 ARS start long (0)
+    nd_region region9: ARS: range 1 complete
+    nd_region region3: ARS: range 1 complete
+    nd_region region4: ARS: range 2 ARS start long (0)
+    nd_region region4: ARS: range 2 complete
+
+...note that the nfit_test emulated driver provides 2 buses, that is why
+some of the range indices are duplicated. Notice that each range
+now successfully completes a short and long scrub.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 14c73f997a5e ("nfit, address-range-scrub: introduce nfit_spa->ars_state")
+Fixes: cc3d3458d46f ("acpi/nfit: queue issuing of ars when an uc error...")
+Reported-by: Jacek Zloch <jacek.zloch@intel.com>
+Reported-by: Krzysztof Rusocki <krzysztof.rusocki@intel.com>
+Reviewed-by: Dave Jiang <dave.jiang@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/acpi/nfit/core.c |  161 +++++++++++++++++++++++++++--------------------
+ drivers/acpi/nfit/nfit.h |    9 +-
+ 2 files changed, 100 insertions(+), 70 deletions(-)
+
+--- a/drivers/acpi/nfit/core.c
++++ b/drivers/acpi/nfit/core.c
+@@ -2456,7 +2456,8 @@ static int ars_get_cap(struct acpi_nfit_
+       return cmd_rc;
+ }
+-static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa)
++static int ars_start(struct acpi_nfit_desc *acpi_desc,
++              struct nfit_spa *nfit_spa, enum nfit_ars_state req_type)
+ {
+       int rc;
+       int cmd_rc;
+@@ -2467,7 +2468,7 @@ static int ars_start(struct acpi_nfit_de
+       memset(&ars_start, 0, sizeof(ars_start));
+       ars_start.address = spa->address;
+       ars_start.length = spa->length;
+-      if (test_bit(ARS_SHORT, &nfit_spa->ars_state))
++      if (req_type == ARS_REQ_SHORT)
+               ars_start.flags = ND_ARS_RETURN_PREV_DATA;
+       if (nfit_spa_type(spa) == NFIT_SPA_PM)
+               ars_start.type = ND_ARS_PERSISTENT;
+@@ -2524,6 +2525,15 @@ static void ars_complete(struct acpi_nfi
+       struct nd_region *nd_region = nfit_spa->nd_region;
+       struct device *dev;
++      lockdep_assert_held(&acpi_desc->init_mutex);
++      /*
++       * Only advance the ARS state for ARS runs initiated by the
++       * kernel, ignore ARS results from BIOS initiated runs for scrub
++       * completion tracking.
++       */
++      if (acpi_desc->scrub_spa != nfit_spa)
++              return;
++
+       if ((ars_status->address >= spa->address && ars_status->address
+                               < spa->address + spa->length)
+                       || (ars_status->address < spa->address)) {
+@@ -2543,23 +2553,13 @@ static void ars_complete(struct acpi_nfi
+       } else
+               return;
+-      if (test_bit(ARS_DONE, &nfit_spa->ars_state))
+-              return;
+-
+-      if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state))
+-              return;
+-
++      acpi_desc->scrub_spa = NULL;
+       if (nd_region) {
+               dev = nd_region_dev(nd_region);
+               nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
+       } else
+               dev = acpi_desc->dev;
+-
+-      dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index,
+-                      test_bit(ARS_SHORT, &nfit_spa->ars_state)
+-                      ? "short" : "long");
+-      clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+-      set_bit(ARS_DONE, &nfit_spa->ars_state);
++      dev_dbg(dev, "ARS: range %d complete\n", spa->range_index);
+ }
+ static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
+@@ -2840,46 +2840,55 @@ static int acpi_nfit_query_poison(struct
+       return 0;
+ }
+-static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa,
+-              int *query_rc)
++static int ars_register(struct acpi_nfit_desc *acpi_desc,
++              struct nfit_spa *nfit_spa)
+ {
+-      int rc = *query_rc;
++      int rc;
+-      if (no_init_ars)
++      if (no_init_ars || test_bit(ARS_FAILED, &nfit_spa->ars_state))
+               return acpi_nfit_register_region(acpi_desc, nfit_spa);
+-      set_bit(ARS_REQ, &nfit_spa->ars_state);
+-      set_bit(ARS_SHORT, &nfit_spa->ars_state);
++      set_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
++      set_bit(ARS_REQ_LONG, &nfit_spa->ars_state);
+-      switch (rc) {
++      switch (acpi_nfit_query_poison(acpi_desc)) {
+       case 0:
+       case -EAGAIN:
+-              rc = ars_start(acpi_desc, nfit_spa);
+-              if (rc == -EBUSY) {
+-                      *query_rc = rc;
++              rc = ars_start(acpi_desc, nfit_spa, ARS_REQ_SHORT);
++              /* shouldn't happen, try again later */
++              if (rc == -EBUSY)
+                       break;
+-              } else if (rc == 0) {
+-                      rc = acpi_nfit_query_poison(acpi_desc);
+-              } else {
++              if (rc) {
+                       set_bit(ARS_FAILED, &nfit_spa->ars_state);
+                       break;
+               }
+-              if (rc == -EAGAIN)
+-                      clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+-              else if (rc == 0)
+-                      ars_complete(acpi_desc, nfit_spa);
++              clear_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
++              rc = acpi_nfit_query_poison(acpi_desc);
++              if (rc)
++                      break;
++              acpi_desc->scrub_spa = nfit_spa;
++              ars_complete(acpi_desc, nfit_spa);
++              /*
++               * If ars_complete() says we didn't complete the
++               * short scrub, we'll try again with a long
++               * request.
++               */
++              acpi_desc->scrub_spa = NULL;
+               break;
+       case -EBUSY:
++      case -ENOMEM:
+       case -ENOSPC:
++              /*
++               * BIOS was using ARS, wait for it to complete (or
++               * resources to become available) and then perform our
++               * own scrubs.
++               */
+               break;
+       default:
+               set_bit(ARS_FAILED, &nfit_spa->ars_state);
+               break;
+       }
+-      if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state))
+-              set_bit(ARS_REQ, &nfit_spa->ars_state);
+-
+       return acpi_nfit_register_region(acpi_desc, nfit_spa);
+ }
+@@ -2901,6 +2910,8 @@ static unsigned int __acpi_nfit_scrub(st
+       struct device *dev = acpi_desc->dev;
+       struct nfit_spa *nfit_spa;
++      lockdep_assert_held(&acpi_desc->init_mutex);
++
+       if (acpi_desc->cancel)
+               return 0;
+@@ -2924,21 +2935,49 @@ static unsigned int __acpi_nfit_scrub(st
+       ars_complete_all(acpi_desc);
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
++              enum nfit_ars_state req_type;
++              int rc;
++
+               if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
+                       continue;
+-              if (test_bit(ARS_REQ, &nfit_spa->ars_state)) {
+-                      int rc = ars_start(acpi_desc, nfit_spa);
+-                      clear_bit(ARS_DONE, &nfit_spa->ars_state);
+-                      dev = nd_region_dev(nfit_spa->nd_region);
+-                      dev_dbg(dev, "ARS: range %d ARS start (%d)\n",
+-                                      nfit_spa->spa->range_index, rc);
+-                      if (rc == 0 || rc == -EBUSY)
+-                              return 1;
+-                      dev_err(dev, "ARS: range %d ARS failed (%d)\n",
+-                                      nfit_spa->spa->range_index, rc);
+-                      set_bit(ARS_FAILED, &nfit_spa->ars_state);
++              /* prefer short ARS requests first */
++              if (test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state))
++                      req_type = ARS_REQ_SHORT;
++              else if (test_bit(ARS_REQ_LONG, &nfit_spa->ars_state))
++                      req_type = ARS_REQ_LONG;
++              else
++                      continue;
++              rc = ars_start(acpi_desc, nfit_spa, req_type);
++
++              dev = nd_region_dev(nfit_spa->nd_region);
++              dev_dbg(dev, "ARS: range %d ARS start %s (%d)\n",
++                              nfit_spa->spa->range_index,
++                              req_type == ARS_REQ_SHORT ? "short" : "long",
++                              rc);
++              /*
++               * Hmm, we raced someone else starting ARS? Try again in
++               * a bit.
++               */
++              if (rc == -EBUSY)
++                      return 1;
++              if (rc == 0) {
++                      dev_WARN_ONCE(dev, acpi_desc->scrub_spa,
++                                      "scrub start while range %d active\n",
++                                      acpi_desc->scrub_spa->spa->range_index);
++                      clear_bit(req_type, &nfit_spa->ars_state);
++                      acpi_desc->scrub_spa = nfit_spa;
++                      /*
++                       * Consider this spa last for future scrub
++                       * requests
++                       */
++                      list_move_tail(&nfit_spa->list, &acpi_desc->spas);
++                      return 1;
+               }
++
++              dev_err(dev, "ARS: range %d ARS failed (%d)\n",
++                              nfit_spa->spa->range_index, rc);
++              set_bit(ARS_FAILED, &nfit_spa->ars_state);
+       }
+       return 0;
+ }
+@@ -2994,6 +3033,7 @@ static void acpi_nfit_init_ars(struct ac
+       struct nd_cmd_ars_cap ars_cap;
+       int rc;
++      set_bit(ARS_FAILED, &nfit_spa->ars_state);
+       memset(&ars_cap, 0, sizeof(ars_cap));
+       rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
+       if (rc < 0)
+@@ -3010,16 +3050,14 @@ static void acpi_nfit_init_ars(struct ac
+       nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
+       acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
+       clear_bit(ARS_FAILED, &nfit_spa->ars_state);
+-      set_bit(ARS_REQ, &nfit_spa->ars_state);
+ }
+ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
+ {
+       struct nfit_spa *nfit_spa;
+-      int rc, query_rc;
++      int rc;
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+-              set_bit(ARS_FAILED, &nfit_spa->ars_state);
+               switch (nfit_spa_type(nfit_spa->spa)) {
+               case NFIT_SPA_VOLATILE:
+               case NFIT_SPA_PM:
+@@ -3028,20 +3066,12 @@ static int acpi_nfit_register_regions(st
+               }
+       }
+-      /*
+-       * Reap any results that might be pending before starting new
+-       * short requests.
+-       */
+-      query_rc = acpi_nfit_query_poison(acpi_desc);
+-      if (query_rc == 0)
+-              ars_complete_all(acpi_desc);
+-
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+               switch (nfit_spa_type(nfit_spa->spa)) {
+               case NFIT_SPA_VOLATILE:
+               case NFIT_SPA_PM:
+                       /* register regions and kick off initial ARS run */
+-                      rc = ars_register(acpi_desc, nfit_spa, &query_rc);
++                      rc = ars_register(acpi_desc, nfit_spa);
+                       if (rc)
+                               return rc;
+                       break;
+@@ -3236,7 +3266,8 @@ static int acpi_nfit_clear_to_send(struc
+       return 0;
+ }
+-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
++int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
++              enum nfit_ars_state req_type)
+ {
+       struct device *dev = acpi_desc->dev;
+       int scheduled = 0, busy = 0;
+@@ -3256,13 +3287,10 @@ int acpi_nfit_ars_rescan(struct acpi_nfi
+               if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
+                       continue;
+-              if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state))
++              if (test_and_set_bit(req_type, &nfit_spa->ars_state))
+                       busy++;
+-              else {
+-                      if (test_bit(ARS_SHORT, &flags))
+-                              set_bit(ARS_SHORT, &nfit_spa->ars_state);
++              else
+                       scheduled++;
+-              }
+       }
+       if (scheduled) {
+               sched_ars(acpi_desc);
+@@ -3448,10 +3476,11 @@ static void acpi_nfit_update_notify(stru
+ static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
+ {
+       struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
+-      unsigned long flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
+-                      0 : 1 << ARS_SHORT;
+-      acpi_nfit_ars_rescan(acpi_desc, flags);
++      if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON)
++              acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG);
++      else
++              acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_SHORT);
+ }
+ void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
+--- a/drivers/acpi/nfit/nfit.h
++++ b/drivers/acpi/nfit/nfit.h
+@@ -118,9 +118,8 @@ enum nfit_dimm_notifiers {
+ };
+ enum nfit_ars_state {
+-      ARS_REQ,
+-      ARS_DONE,
+-      ARS_SHORT,
++      ARS_REQ_SHORT,
++      ARS_REQ_LONG,
+       ARS_FAILED,
+ };
+@@ -197,6 +196,7 @@ struct acpi_nfit_desc {
+       struct device *dev;
+       u8 ars_start_flags;
+       struct nd_cmd_ars_status *ars_status;
++      struct nfit_spa *scrub_spa;
+       struct delayed_work dwork;
+       struct list_head list;
+       struct kernfs_node *scrub_count_state;
+@@ -251,7 +251,8 @@ struct nfit_blk {
+ extern struct list_head acpi_descs;
+ extern struct mutex acpi_desc_lock;
+-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags);
++int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
++              enum nfit_ars_state req_type);
+ #ifdef CONFIG_X86_MCE
+ void nfit_mce_register(void);
index 0e248f886a6c34fefda892c97ae8811cfa3454a3..a1a00a276eadc809375bdc3804bc89f8c8c17838 100644 (file)
@@ -23,3 +23,4 @@ acpica-aml-parser-fix-parse-loop-to-correctly-skip-erroneous-extended-opcodes.pa
 kprobes-x86-use-preempt_enable-in-optimized_callback.patch
 ipmi-fix-timer-race-with-module-unload.patch
 mailbox-pcc-handle-parse-error.patch
+acpi-nfit-fix-address-range-scrub-completion-tracking.patch