From: Greg Kroah-Hartman Date: Sat, 10 Nov 2018 18:24:52 +0000 (-0800) Subject: 4.18-stable patches X-Git-Tag: v4.19.2~79 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=181ae1b66d0643e74016b32253a5276c08b8b302;p=thirdparty%2Fkernel%2Fstable-queue.git 4.18-stable patches added patches: acpi-nfit-fix-address-range-scrub-completion-tracking.patch --- diff --git a/queue-4.18/acpi-nfit-fix-address-range-scrub-completion-tracking.patch b/queue-4.18/acpi-nfit-fix-address-range-scrub-completion-tracking.patch new file mode 100644 index 00000000000..62cfb19f467 --- /dev/null +++ b/queue-4.18/acpi-nfit-fix-address-range-scrub-completion-tracking.patch @@ -0,0 +1,403 @@ +From d3abaf43bab8d5b0a3c6b982100d9e2be96de4ad Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Sat, 13 Oct 2018 20:32:17 -0700 +Subject: acpi, nfit: Fix Address Range Scrub completion tracking + +From: Dan Williams + +commit d3abaf43bab8d5b0a3c6b982100d9e2be96de4ad upstream. + +The Address Range Scrub implementation tried to skip running scrubs +against ranges that were already scrubbed by the BIOS. Unfortunately +that support also resulted in early scrub completions as evidenced by +this debug output from nfit_test: + + nd_region region9: ARS: range 1 short complete + nd_region region3: ARS: range 1 short complete + nd_region region4: ARS: range 2 ARS start (0) + nd_region region4: ARS: range 2 short complete + +...i.e. completions without any indications that the scrub was started. + +This state of affairs was hard to see in the code due to the +proliferation of state bits and mistakenly trying to track done state +per-range when the completion is a global property of the bus. + +So, kill the four ARS state bits (ARS_REQ, ARS_REQ_REDO, ARS_DONE, and +ARS_SHORT), and replace them with just 2 request flags ARS_REQ_SHORT and +ARS_REQ_LONG. The implementation will still complete and reap the +results of BIOS initiated ARS, but it will not attempt to use that +information to affect the completion status of scrubbing the ranges from +a Linux perspective. + +Instead, try to synchronously run a short ARS per range at init time and +schedule a long scrub in the background. If ARS is busy with an ARS +request, schedule both a short and a long scrub for when ARS returns to +idle. This logic also satisfies the intent of what ARS_REQ_REDO was +trying to achieve. The new rule is that the REQ flag stays set until the +next successful ars_start() for that range. + +With the new policy that the REQ flags are not cleared until the next +start, the implementation no longer loses requests as can be seen from +the following log: + + nd_region region3: ARS: range 1 ARS start short (0) + nd_region region9: ARS: range 1 ARS start short (0) + nd_region region3: ARS: range 1 complete + nd_region region4: ARS: range 2 ARS start short (0) + nd_region region9: ARS: range 1 complete + nd_region region9: ARS: range 1 ARS start long (0) + nd_region region4: ARS: range 2 complete + nd_region region3: ARS: range 1 ARS start long (0) + nd_region region9: ARS: range 1 complete + nd_region region3: ARS: range 1 complete + nd_region region4: ARS: range 2 ARS start long (0) + nd_region region4: ARS: range 2 complete + +...note that the nfit_test emulated driver provides 2 buses, that is why +some of the range indices are duplicated. Notice that each range +now successfully completes a short and long scrub. + +Cc: +Fixes: 14c73f997a5e ("nfit, address-range-scrub: introduce nfit_spa->ars_state") +Fixes: cc3d3458d46f ("acpi/nfit: queue issuing of ars when an uc error...") +Reported-by: Jacek Zloch +Reported-by: Krzysztof Rusocki +Reviewed-by: Dave Jiang +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/nfit/core.c | 161 +++++++++++++++++++++++++++-------------------- + drivers/acpi/nfit/nfit.h | 9 +- + 2 files changed, 100 insertions(+), 70 deletions(-) + +--- a/drivers/acpi/nfit/core.c ++++ b/drivers/acpi/nfit/core.c +@@ -2456,7 +2456,8 @@ static int ars_get_cap(struct acpi_nfit_ + return cmd_rc; + } + +-static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa) ++static int ars_start(struct acpi_nfit_desc *acpi_desc, ++ struct nfit_spa *nfit_spa, enum nfit_ars_state req_type) + { + int rc; + int cmd_rc; +@@ -2467,7 +2468,7 @@ static int ars_start(struct acpi_nfit_de + memset(&ars_start, 0, sizeof(ars_start)); + ars_start.address = spa->address; + ars_start.length = spa->length; +- if (test_bit(ARS_SHORT, &nfit_spa->ars_state)) ++ if (req_type == ARS_REQ_SHORT) + ars_start.flags = ND_ARS_RETURN_PREV_DATA; + if (nfit_spa_type(spa) == NFIT_SPA_PM) + ars_start.type = ND_ARS_PERSISTENT; +@@ -2524,6 +2525,15 @@ static void ars_complete(struct acpi_nfi + struct nd_region *nd_region = nfit_spa->nd_region; + struct device *dev; + ++ lockdep_assert_held(&acpi_desc->init_mutex); ++ /* ++ * Only advance the ARS state for ARS runs initiated by the ++ * kernel, ignore ARS results from BIOS initiated runs for scrub ++ * completion tracking. ++ */ ++ if (acpi_desc->scrub_spa != nfit_spa) ++ return; ++ + if ((ars_status->address >= spa->address && ars_status->address + < spa->address + spa->length) + || (ars_status->address < spa->address)) { +@@ -2543,23 +2553,13 @@ static void ars_complete(struct acpi_nfi + } else + return; + +- if (test_bit(ARS_DONE, &nfit_spa->ars_state)) +- return; +- +- if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state)) +- return; +- ++ acpi_desc->scrub_spa = NULL; + if (nd_region) { + dev = nd_region_dev(nd_region); + nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON); + } else + dev = acpi_desc->dev; +- +- dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index, +- test_bit(ARS_SHORT, &nfit_spa->ars_state) +- ? "short" : "long"); +- clear_bit(ARS_SHORT, &nfit_spa->ars_state); +- set_bit(ARS_DONE, &nfit_spa->ars_state); ++ dev_dbg(dev, "ARS: range %d complete\n", spa->range_index); + } + + static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc) +@@ -2840,46 +2840,55 @@ static int acpi_nfit_query_poison(struct + return 0; + } + +-static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa, +- int *query_rc) ++static int ars_register(struct acpi_nfit_desc *acpi_desc, ++ struct nfit_spa *nfit_spa) + { +- int rc = *query_rc; ++ int rc; + +- if (no_init_ars) ++ if (no_init_ars || test_bit(ARS_FAILED, &nfit_spa->ars_state)) + return acpi_nfit_register_region(acpi_desc, nfit_spa); + +- set_bit(ARS_REQ, &nfit_spa->ars_state); +- set_bit(ARS_SHORT, &nfit_spa->ars_state); ++ set_bit(ARS_REQ_SHORT, &nfit_spa->ars_state); ++ set_bit(ARS_REQ_LONG, &nfit_spa->ars_state); + +- switch (rc) { ++ switch (acpi_nfit_query_poison(acpi_desc)) { + case 0: + case -EAGAIN: +- rc = ars_start(acpi_desc, nfit_spa); +- if (rc == -EBUSY) { +- *query_rc = rc; ++ rc = ars_start(acpi_desc, nfit_spa, ARS_REQ_SHORT); ++ /* shouldn't happen, try again later */ ++ if (rc == -EBUSY) + break; +- } else if (rc == 0) { +- rc = acpi_nfit_query_poison(acpi_desc); +- } else { ++ if (rc) { + set_bit(ARS_FAILED, &nfit_spa->ars_state); + break; + } +- if (rc == -EAGAIN) +- clear_bit(ARS_SHORT, &nfit_spa->ars_state); +- else if (rc == 0) +- ars_complete(acpi_desc, nfit_spa); ++ clear_bit(ARS_REQ_SHORT, &nfit_spa->ars_state); ++ rc = acpi_nfit_query_poison(acpi_desc); ++ if (rc) ++ break; ++ acpi_desc->scrub_spa = nfit_spa; ++ ars_complete(acpi_desc, nfit_spa); ++ /* ++ * If ars_complete() says we didn't complete the ++ * short scrub, we'll try again with a long ++ * request. ++ */ ++ acpi_desc->scrub_spa = NULL; + break; + case -EBUSY: ++ case -ENOMEM: + case -ENOSPC: ++ /* ++ * BIOS was using ARS, wait for it to complete (or ++ * resources to become available) and then perform our ++ * own scrubs. ++ */ + break; + default: + set_bit(ARS_FAILED, &nfit_spa->ars_state); + break; + } + +- if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state)) +- set_bit(ARS_REQ, &nfit_spa->ars_state); +- + return acpi_nfit_register_region(acpi_desc, nfit_spa); + } + +@@ -2901,6 +2910,8 @@ static unsigned int __acpi_nfit_scrub(st + struct device *dev = acpi_desc->dev; + struct nfit_spa *nfit_spa; + ++ lockdep_assert_held(&acpi_desc->init_mutex); ++ + if (acpi_desc->cancel) + return 0; + +@@ -2924,21 +2935,49 @@ static unsigned int __acpi_nfit_scrub(st + + ars_complete_all(acpi_desc); + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { ++ enum nfit_ars_state req_type; ++ int rc; ++ + if (test_bit(ARS_FAILED, &nfit_spa->ars_state)) + continue; +- if (test_bit(ARS_REQ, &nfit_spa->ars_state)) { +- int rc = ars_start(acpi_desc, nfit_spa); + +- clear_bit(ARS_DONE, &nfit_spa->ars_state); +- dev = nd_region_dev(nfit_spa->nd_region); +- dev_dbg(dev, "ARS: range %d ARS start (%d)\n", +- nfit_spa->spa->range_index, rc); +- if (rc == 0 || rc == -EBUSY) +- return 1; +- dev_err(dev, "ARS: range %d ARS failed (%d)\n", +- nfit_spa->spa->range_index, rc); +- set_bit(ARS_FAILED, &nfit_spa->ars_state); ++ /* prefer short ARS requests first */ ++ if (test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state)) ++ req_type = ARS_REQ_SHORT; ++ else if (test_bit(ARS_REQ_LONG, &nfit_spa->ars_state)) ++ req_type = ARS_REQ_LONG; ++ else ++ continue; ++ rc = ars_start(acpi_desc, nfit_spa, req_type); ++ ++ dev = nd_region_dev(nfit_spa->nd_region); ++ dev_dbg(dev, "ARS: range %d ARS start %s (%d)\n", ++ nfit_spa->spa->range_index, ++ req_type == ARS_REQ_SHORT ? "short" : "long", ++ rc); ++ /* ++ * Hmm, we raced someone else starting ARS? Try again in ++ * a bit. ++ */ ++ if (rc == -EBUSY) ++ return 1; ++ if (rc == 0) { ++ dev_WARN_ONCE(dev, acpi_desc->scrub_spa, ++ "scrub start while range %d active\n", ++ acpi_desc->scrub_spa->spa->range_index); ++ clear_bit(req_type, &nfit_spa->ars_state); ++ acpi_desc->scrub_spa = nfit_spa; ++ /* ++ * Consider this spa last for future scrub ++ * requests ++ */ ++ list_move_tail(&nfit_spa->list, &acpi_desc->spas); ++ return 1; + } ++ ++ dev_err(dev, "ARS: range %d ARS failed (%d)\n", ++ nfit_spa->spa->range_index, rc); ++ set_bit(ARS_FAILED, &nfit_spa->ars_state); + } + return 0; + } +@@ -2994,6 +3033,7 @@ static void acpi_nfit_init_ars(struct ac + struct nd_cmd_ars_cap ars_cap; + int rc; + ++ set_bit(ARS_FAILED, &nfit_spa->ars_state); + memset(&ars_cap, 0, sizeof(ars_cap)); + rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa); + if (rc < 0) +@@ -3010,16 +3050,14 @@ static void acpi_nfit_init_ars(struct ac + nfit_spa->clear_err_unit = ars_cap.clear_err_unit; + acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars); + clear_bit(ARS_FAILED, &nfit_spa->ars_state); +- set_bit(ARS_REQ, &nfit_spa->ars_state); + } + + static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) + { + struct nfit_spa *nfit_spa; +- int rc, query_rc; ++ int rc; + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { +- set_bit(ARS_FAILED, &nfit_spa->ars_state); + switch (nfit_spa_type(nfit_spa->spa)) { + case NFIT_SPA_VOLATILE: + case NFIT_SPA_PM: +@@ -3028,20 +3066,12 @@ static int acpi_nfit_register_regions(st + } + } + +- /* +- * Reap any results that might be pending before starting new +- * short requests. +- */ +- query_rc = acpi_nfit_query_poison(acpi_desc); +- if (query_rc == 0) +- ars_complete_all(acpi_desc); +- + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) + switch (nfit_spa_type(nfit_spa->spa)) { + case NFIT_SPA_VOLATILE: + case NFIT_SPA_PM: + /* register regions and kick off initial ARS run */ +- rc = ars_register(acpi_desc, nfit_spa, &query_rc); ++ rc = ars_register(acpi_desc, nfit_spa); + if (rc) + return rc; + break; +@@ -3236,7 +3266,8 @@ static int acpi_nfit_clear_to_send(struc + return 0; + } + +-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags) ++int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, ++ enum nfit_ars_state req_type) + { + struct device *dev = acpi_desc->dev; + int scheduled = 0, busy = 0; +@@ -3256,13 +3287,10 @@ int acpi_nfit_ars_rescan(struct acpi_nfi + if (test_bit(ARS_FAILED, &nfit_spa->ars_state)) + continue; + +- if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state)) ++ if (test_and_set_bit(req_type, &nfit_spa->ars_state)) + busy++; +- else { +- if (test_bit(ARS_SHORT, &flags)) +- set_bit(ARS_SHORT, &nfit_spa->ars_state); ++ else + scheduled++; +- } + } + if (scheduled) { + sched_ars(acpi_desc); +@@ -3448,10 +3476,11 @@ static void acpi_nfit_update_notify(stru + static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle) + { + struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev); +- unsigned long flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ? +- 0 : 1 << ARS_SHORT; + +- acpi_nfit_ars_rescan(acpi_desc, flags); ++ if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ++ acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG); ++ else ++ acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_SHORT); + } + + void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) +--- a/drivers/acpi/nfit/nfit.h ++++ b/drivers/acpi/nfit/nfit.h +@@ -118,9 +118,8 @@ enum nfit_dimm_notifiers { + }; + + enum nfit_ars_state { +- ARS_REQ, +- ARS_DONE, +- ARS_SHORT, ++ ARS_REQ_SHORT, ++ ARS_REQ_LONG, + ARS_FAILED, + }; + +@@ -197,6 +196,7 @@ struct acpi_nfit_desc { + struct device *dev; + u8 ars_start_flags; + struct nd_cmd_ars_status *ars_status; ++ struct nfit_spa *scrub_spa; + struct delayed_work dwork; + struct list_head list; + struct kernfs_node *scrub_count_state; +@@ -251,7 +251,8 @@ struct nfit_blk { + + extern struct list_head acpi_descs; + extern struct mutex acpi_desc_lock; +-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags); ++int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, ++ enum nfit_ars_state req_type); + + #ifdef CONFIG_X86_MCE + void nfit_mce_register(void); diff --git a/queue-4.18/series b/queue-4.18/series index 0e248f886a6..a1a00a276ea 100644 --- a/queue-4.18/series +++ b/queue-4.18/series @@ -23,3 +23,4 @@ acpica-aml-parser-fix-parse-loop-to-correctly-skip-erroneous-extended-opcodes.pa kprobes-x86-use-preempt_enable-in-optimized_callback.patch ipmi-fix-timer-race-with-module-unload.patch mailbox-pcc-handle-parse-error.patch +acpi-nfit-fix-address-range-scrub-completion-tracking.patch