From 4bf907f9a49d8ebe235b02c22ec48fa4f68fe5af Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 5 Nov 2024 17:52:08 +0100 Subject: [PATCH] 6.11-stable patches added patches: cxl-port-fix-cxl-port-initialization-order-when-the-subsystem-is-built-in.patch cxl-port-fix-use-after-free-permit-out-of-order-decoder-shutdown.patch soc-qcom-pmic_glink-handle-glink-intent-allocation-rejections.patch --- ...order-when-the-subsystem-is-built-in.patch | 121 ++++++ ...permit-out-of-order-decoder-shutdown.patch | 380 ++++++++++++++++++ queue-6.11/series | 3 + ...e-glink-intent-allocation-rejections.patch | 99 +++++ 4 files changed, 603 insertions(+) create mode 100644 queue-6.11/cxl-port-fix-cxl-port-initialization-order-when-the-subsystem-is-built-in.patch create mode 100644 queue-6.11/cxl-port-fix-use-after-free-permit-out-of-order-decoder-shutdown.patch create mode 100644 queue-6.11/soc-qcom-pmic_glink-handle-glink-intent-allocation-rejections.patch diff --git a/queue-6.11/cxl-port-fix-cxl-port-initialization-order-when-the-subsystem-is-built-in.patch b/queue-6.11/cxl-port-fix-cxl-port-initialization-order-when-the-subsystem-is-built-in.patch new file mode 100644 index 00000000000..894201ebe58 --- /dev/null +++ b/queue-6.11/cxl-port-fix-cxl-port-initialization-order-when-the-subsystem-is-built-in.patch @@ -0,0 +1,121 @@ +From 6575b268157f37929948a8d1f3bafb3d7c055bc1 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Fri, 25 Oct 2024 12:32:55 -0700 +Subject: cxl/port: Fix CXL port initialization order when the subsystem is built-in + +From: Dan Williams + +commit 6575b268157f37929948a8d1f3bafb3d7c055bc1 upstream. + +When the CXL subsystem is built-in the module init order is determined +by Makefile order. That order violates expectations. The expectation is +that cxl_acpi and cxl_mem can race to attach. If cxl_acpi wins the race, +cxl_mem will find the enabled CXL root ports it needs. If cxl_acpi loses +the race it will retrigger cxl_mem to attach via cxl_bus_rescan(). That +flow only works if cxl_acpi can assume ports are enabled immediately +upon cxl_acpi_probe() return. That in turn can only happen in the +CONFIG_CXL_ACPI=y case if the cxl_port driver is registered before +cxl_acpi_probe() runs. + +Fix up the order to prevent initialization failures. Ensure that +cxl_port is built-in when cxl_acpi is also built-in, arrange for +Makefile order to resolve the subsys_initcall() order of cxl_port and +cxl_acpi, and arrange for Makefile order to resolve the +device_initcall() (module_init()) order of the remaining objects. + +As for what contributed to this not being found earlier, the CXL +regression environment, cxl_test, builds all CXL functionality as a +module to allow to symbol mocking and other dynamic reload tests. As a +result there is no regression coverage for the built-in case. + +Reported-by: Gregory Price +Closes: http://lore.kernel.org/20241004212504.1246-1-gourry@gourry.net +Tested-by: Gregory Price +Fixes: 8dd2bc0f8e02 ("cxl/mem: Add the cxl_mem driver") +Cc: stable@vger.kernel.org +Cc: Davidlohr Bueso +Cc: Jonathan Cameron +Cc: Dave Jiang +Cc: Alison Schofield +Cc: Vishal Verma +Cc: Ira Weiny +Reviewed-by: Jonathan Cameron +Reviewed-by: Ira Weiny +Tested-by: Alejandro Lucero +Reviewed-by: Alejandro Lucero +Signed-off-by: Dan Williams +Link: https://patch.msgid.link/172988474904.476062.7961350937442459266.stgit@dwillia2-xfh.jf.intel.com +Signed-off-by: Ira Weiny +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cxl/Kconfig | 1 + + drivers/cxl/Makefile | 20 ++++++++++++++------ + drivers/cxl/port.c | 17 ++++++++++++++++- + 3 files changed, 31 insertions(+), 7 deletions(-) + +--- a/drivers/cxl/Kconfig ++++ b/drivers/cxl/Kconfig +@@ -60,6 +60,7 @@ config CXL_ACPI + default CXL_BUS + select ACPI_TABLE_LIB + select ACPI_HMAT ++ select CXL_PORT + help + Enable support for host managed device memory (HDM) resources + published by a platform's ACPI CXL memory layout description. See +--- a/drivers/cxl/Makefile ++++ b/drivers/cxl/Makefile +@@ -1,13 +1,21 @@ + # SPDX-License-Identifier: GPL-2.0 ++ ++# Order is important here for the built-in case: ++# - 'core' first for fundamental init ++# - 'port' before platform root drivers like 'acpi' so that CXL-root ports ++# are immediately enabled ++# - 'mem' and 'pmem' before endpoint drivers so that memdevs are ++# immediately enabled ++# - 'pci' last, also mirrors the hardware enumeration hierarchy + obj-y += core/ +-obj-$(CONFIG_CXL_PCI) += cxl_pci.o +-obj-$(CONFIG_CXL_MEM) += cxl_mem.o ++obj-$(CONFIG_CXL_PORT) += cxl_port.o + obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o + obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o +-obj-$(CONFIG_CXL_PORT) += cxl_port.o ++obj-$(CONFIG_CXL_MEM) += cxl_mem.o ++obj-$(CONFIG_CXL_PCI) += cxl_pci.o + +-cxl_mem-y := mem.o +-cxl_pci-y := pci.o ++cxl_port-y := port.o + cxl_acpi-y := acpi.o + cxl_pmem-y := pmem.o security.o +-cxl_port-y := port.o ++cxl_mem-y := mem.o ++cxl_pci-y := pci.o +--- a/drivers/cxl/port.c ++++ b/drivers/cxl/port.c +@@ -208,7 +208,22 @@ static struct cxl_driver cxl_port_driver + }, + }; + +-module_cxl_driver(cxl_port_driver); ++static int __init cxl_port_init(void) ++{ ++ return cxl_driver_register(&cxl_port_driver); ++} ++/* ++ * Be ready to immediately enable ports emitted by the platform CXL root ++ * (e.g. cxl_acpi) when CONFIG_CXL_PORT=y. ++ */ ++subsys_initcall(cxl_port_init); ++ ++static void __exit cxl_port_exit(void) ++{ ++ cxl_driver_unregister(&cxl_port_driver); ++} ++module_exit(cxl_port_exit); ++ + MODULE_DESCRIPTION("CXL: Port enumeration and services"); + MODULE_LICENSE("GPL v2"); + MODULE_IMPORT_NS(CXL); diff --git a/queue-6.11/cxl-port-fix-use-after-free-permit-out-of-order-decoder-shutdown.patch b/queue-6.11/cxl-port-fix-use-after-free-permit-out-of-order-decoder-shutdown.patch new file mode 100644 index 00000000000..f3fb8e96900 --- /dev/null +++ b/queue-6.11/cxl-port-fix-use-after-free-permit-out-of-order-decoder-shutdown.patch @@ -0,0 +1,380 @@ +From 101c268bd2f37e965a5468353e62d154db38838e Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Tue, 22 Oct 2024 18:43:49 -0700 +Subject: cxl/port: Fix use-after-free, permit out-of-order decoder shutdown + +From: Dan Williams + +commit 101c268bd2f37e965a5468353e62d154db38838e upstream. + +In support of investigating an initialization failure report [1], +cxl_test was updated to register mock memory-devices after the mock +root-port/bus device had been registered. That led to cxl_test crashing +with a use-after-free bug with the following signature: + + cxl_port_attach_region: cxl region3: cxl_host_bridge.0:port3 decoder3.0 add: mem0:decoder7.0 @ 0 next: cxl_switch_uport.0 nr_eps: 1 nr_targets: 1 + cxl_port_attach_region: cxl region3: cxl_host_bridge.0:port3 decoder3.0 add: mem4:decoder14.0 @ 1 next: cxl_switch_uport.0 nr_eps: 2 nr_targets: 1 + cxl_port_setup_targets: cxl region3: cxl_switch_uport.0:port6 target[0] = cxl_switch_dport.0 for mem0:decoder7.0 @ 0 +1) cxl_port_setup_targets: cxl region3: cxl_switch_uport.0:port6 target[1] = cxl_switch_dport.4 for mem4:decoder14.0 @ 1 + [..] + cxld_unregister: cxl decoder14.0: + cxl_region_decode_reset: cxl_region region3: + mock_decoder_reset: cxl_port port3: decoder3.0 reset +2) mock_decoder_reset: cxl_port port3: decoder3.0: out of order reset, expected decoder3.1 + cxl_endpoint_decoder_release: cxl decoder14.0: + [..] + cxld_unregister: cxl decoder7.0: +3) cxl_region_decode_reset: cxl_region region3: + Oops: general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6bc3: 0000 [#1] PREEMPT SMP PTI + [..] + RIP: 0010:to_cxl_port+0x8/0x60 [cxl_core] + [..] + Call Trace: + + cxl_region_decode_reset+0x69/0x190 [cxl_core] + cxl_region_detach+0xe8/0x210 [cxl_core] + cxl_decoder_kill_region+0x27/0x40 [cxl_core] + cxld_unregister+0x5d/0x60 [cxl_core] + +At 1) a region has been established with 2 endpoint decoders (7.0 and +14.0). Those endpoints share a common switch-decoder in the topology +(3.0). At teardown, 2), decoder14.0 is the first to be removed and hits +the "out of order reset case" in the switch decoder. The effect though +is that region3 cleanup is aborted leaving it in-tact and +referencing decoder14.0. At 3) the second attempt to teardown region3 +trips over the stale decoder14.0 object which has long since been +deleted. + +The fix here is to recognize that the CXL specification places no +mandate on in-order shutdown of switch-decoders, the driver enforces +in-order allocation, and hardware enforces in-order commit. So, rather +than fail and leave objects dangling, always remove them. + +In support of making cxl_region_decode_reset() always succeed, +cxl_region_invalidate_memregion() failures are turned into warnings. +Crashing the kernel is ok there since system integrity is at risk if +caches cannot be managed around physical address mutation events like +CXL region destruction. + +A new device_for_each_child_reverse_from() is added to cleanup +port->commit_end after all dependent decoders have been disabled. In +other words if decoders are allocated 0->1->2 and disabled 1->2->0 then +port->commit_end only decrements from 2 after 2 has been disabled, and +it decrements all the way to zero since 1 was disabled previously. + +Link: http://lore.kernel.org/20241004212504.1246-1-gourry@gourry.net [1] +Cc: stable@vger.kernel.org +Fixes: 176baefb2eb5 ("cxl/hdm: Commit decoder state to hardware") +Reviewed-by: Jonathan Cameron +Cc: Greg Kroah-Hartman +Cc: Davidlohr Bueso +Cc: Dave Jiang +Cc: Alison Schofield +Cc: Ira Weiny +Cc: Zijun Hu +Signed-off-by: Dan Williams +Reviewed-by: Ira Weiny +Link: https://patch.msgid.link/172964782781.81806.17902885593105284330.stgit@dwillia2-xfh.jf.intel.com +Signed-off-by: Ira Weiny +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/core.c | 35 ++++++++++++++++++++++++++++++ + drivers/cxl/core/hdm.c | 50 ++++++++++++++++++++++++++++++++++++------- + drivers/cxl/core/region.c | 48 +++++++++++------------------------------ + drivers/cxl/cxl.h | 3 +- + include/linux/device.h | 3 ++ + tools/testing/cxl/test/cxl.c | 14 ++++-------- + 6 files changed, 100 insertions(+), 53 deletions(-) + +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -4042,6 +4042,41 @@ int device_for_each_child_reverse(struct + EXPORT_SYMBOL_GPL(device_for_each_child_reverse); + + /** ++ * device_for_each_child_reverse_from - device child iterator in reversed order. ++ * @parent: parent struct device. ++ * @from: optional starting point in child list ++ * @fn: function to be called for each device. ++ * @data: data for the callback. ++ * ++ * Iterate over @parent's child devices, starting at @from, and call @fn ++ * for each, passing it @data. This helper is identical to ++ * device_for_each_child_reverse() when @from is NULL. ++ * ++ * @fn is checked each iteration. If it returns anything other than 0, ++ * iteration stop and that value is returned to the caller of ++ * device_for_each_child_reverse_from(); ++ */ ++int device_for_each_child_reverse_from(struct device *parent, ++ struct device *from, const void *data, ++ int (*fn)(struct device *, const void *)) ++{ ++ struct klist_iter i; ++ struct device *child; ++ int error = 0; ++ ++ if (!parent->p) ++ return 0; ++ ++ klist_iter_init_node(&parent->p->klist_children, &i, ++ (from ? &from->p->knode_parent : NULL)); ++ while ((child = prev_device(&i)) && !error) ++ error = fn(child, data); ++ klist_iter_exit(&i); ++ return error; ++} ++EXPORT_SYMBOL_GPL(device_for_each_child_reverse_from); ++ ++/** + * device_find_child - device iterator for locating a particular device. + * @parent: parent struct device + * @match: Callback function to check device +--- a/drivers/cxl/core/hdm.c ++++ b/drivers/cxl/core/hdm.c +@@ -712,7 +712,44 @@ static int cxl_decoder_commit(struct cxl + return 0; + } + +-static int cxl_decoder_reset(struct cxl_decoder *cxld) ++static int commit_reap(struct device *dev, const void *data) ++{ ++ struct cxl_port *port = to_cxl_port(dev->parent); ++ struct cxl_decoder *cxld; ++ ++ if (!is_switch_decoder(dev) && !is_endpoint_decoder(dev)) ++ return 0; ++ ++ cxld = to_cxl_decoder(dev); ++ if (port->commit_end == cxld->id && ++ ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) { ++ port->commit_end--; ++ dev_dbg(&port->dev, "reap: %s commit_end: %d\n", ++ dev_name(&cxld->dev), port->commit_end); ++ } ++ ++ return 0; ++} ++ ++void cxl_port_commit_reap(struct cxl_decoder *cxld) ++{ ++ struct cxl_port *port = to_cxl_port(cxld->dev.parent); ++ ++ lockdep_assert_held_write(&cxl_region_rwsem); ++ ++ /* ++ * Once the highest committed decoder is disabled, free any other ++ * decoders that were pinned allocated by out-of-order release. ++ */ ++ port->commit_end--; ++ dev_dbg(&port->dev, "reap: %s commit_end: %d\n", dev_name(&cxld->dev), ++ port->commit_end); ++ device_for_each_child_reverse_from(&port->dev, &cxld->dev, NULL, ++ commit_reap); ++} ++EXPORT_SYMBOL_NS_GPL(cxl_port_commit_reap, CXL); ++ ++static void cxl_decoder_reset(struct cxl_decoder *cxld) + { + struct cxl_port *port = to_cxl_port(cxld->dev.parent); + struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); +@@ -721,14 +758,14 @@ static int cxl_decoder_reset(struct cxl_ + u32 ctrl; + + if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0) +- return 0; ++ return; + +- if (port->commit_end != id) { ++ if (port->commit_end == id) ++ cxl_port_commit_reap(cxld); ++ else + dev_dbg(&port->dev, + "%s: out of order reset, expected decoder%d.%d\n", + dev_name(&cxld->dev), port->id, port->commit_end); +- return -EBUSY; +- } + + down_read(&cxl_dpa_rwsem); + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id)); +@@ -741,7 +778,6 @@ static int cxl_decoder_reset(struct cxl_ + writel(0, hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(id)); + up_read(&cxl_dpa_rwsem); + +- port->commit_end--; + cxld->flags &= ~CXL_DECODER_F_ENABLE; + + /* Userspace is now responsible for reconfiguring this decoder */ +@@ -751,8 +787,6 @@ static int cxl_decoder_reset(struct cxl_ + cxled = to_cxl_endpoint_decoder(&cxld->dev); + cxled->state = CXL_DECODER_STATE_MANUAL; + } +- +- return 0; + } + + static int cxl_setup_hdm_decoder_from_dvsec( +--- a/drivers/cxl/core/region.c ++++ b/drivers/cxl/core/region.c +@@ -232,8 +232,8 @@ static int cxl_region_invalidate_memregi + "Bypassing cpu_cache_invalidate_memregion() for testing!\n"); + return 0; + } else { +- dev_err(&cxlr->dev, +- "Failed to synchronize CPU cache state\n"); ++ dev_WARN(&cxlr->dev, ++ "Failed to synchronize CPU cache state\n"); + return -ENXIO; + } + } +@@ -242,19 +242,17 @@ static int cxl_region_invalidate_memregi + return 0; + } + +-static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) ++static void cxl_region_decode_reset(struct cxl_region *cxlr, int count) + { + struct cxl_region_params *p = &cxlr->params; +- int i, rc = 0; ++ int i; + + /* +- * Before region teardown attempt to flush, and if the flush +- * fails cancel the region teardown for data consistency +- * concerns ++ * Before region teardown attempt to flush, evict any data cached for ++ * this region, or scream loudly about missing arch / platform support ++ * for CXL teardown. + */ +- rc = cxl_region_invalidate_memregion(cxlr); +- if (rc) +- return rc; ++ cxl_region_invalidate_memregion(cxlr); + + for (i = count - 1; i >= 0; i--) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; +@@ -277,23 +275,17 @@ static int cxl_region_decode_reset(struc + cxl_rr = cxl_rr_load(iter, cxlr); + cxld = cxl_rr->decoder; + if (cxld->reset) +- rc = cxld->reset(cxld); +- if (rc) +- return rc; ++ cxld->reset(cxld); + set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); + } + + endpoint_reset: +- rc = cxled->cxld.reset(&cxled->cxld); +- if (rc) +- return rc; ++ cxled->cxld.reset(&cxled->cxld); + set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); + } + + /* all decoders associated with this region have been torn down */ + clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); +- +- return 0; + } + + static int commit_decoder(struct cxl_decoder *cxld) +@@ -409,16 +401,8 @@ static ssize_t commit_store(struct devic + * still pending. + */ + if (p->state == CXL_CONFIG_RESET_PENDING) { +- rc = cxl_region_decode_reset(cxlr, p->interleave_ways); +- /* +- * Revert to committed since there may still be active +- * decoders associated with this region, or move forward +- * to active to mark the reset successful +- */ +- if (rc) +- p->state = CXL_CONFIG_COMMIT; +- else +- p->state = CXL_CONFIG_ACTIVE; ++ cxl_region_decode_reset(cxlr, p->interleave_ways); ++ p->state = CXL_CONFIG_ACTIVE; + } + } + +@@ -2052,13 +2036,7 @@ static int cxl_region_detach(struct cxl_ + get_device(&cxlr->dev); + + if (p->state > CXL_CONFIG_ACTIVE) { +- /* +- * TODO: tear down all impacted regions if a device is +- * removed out of order +- */ +- rc = cxl_region_decode_reset(cxlr, p->interleave_ways); +- if (rc) +- goto out; ++ cxl_region_decode_reset(cxlr, p->interleave_ways); + p->state = CXL_CONFIG_ACTIVE; + } + +--- a/drivers/cxl/cxl.h ++++ b/drivers/cxl/cxl.h +@@ -359,7 +359,7 @@ struct cxl_decoder { + struct cxl_region *region; + unsigned long flags; + int (*commit)(struct cxl_decoder *cxld); +- int (*reset)(struct cxl_decoder *cxld); ++ void (*reset)(struct cxl_decoder *cxld); + }; + + /* +@@ -730,6 +730,7 @@ static inline bool is_cxl_root(struct cx + int cxl_num_decoders_committed(struct cxl_port *port); + bool is_cxl_port(const struct device *dev); + struct cxl_port *to_cxl_port(const struct device *dev); ++void cxl_port_commit_reap(struct cxl_decoder *cxld); + struct pci_bus; + int devm_cxl_register_pci_bus(struct device *host, struct device *uport_dev, + struct pci_bus *bus); +--- a/include/linux/device.h ++++ b/include/linux/device.h +@@ -1073,6 +1073,9 @@ int device_for_each_child(struct device + int (*fn)(struct device *dev, void *data)); + int device_for_each_child_reverse(struct device *dev, void *data, + int (*fn)(struct device *dev, void *data)); ++int device_for_each_child_reverse_from(struct device *parent, ++ struct device *from, const void *data, ++ int (*fn)(struct device *, const void *)); + struct device *device_find_child(struct device *dev, void *data, + int (*match)(struct device *dev, void *data)); + struct device *device_find_child_by_name(struct device *parent, +--- a/tools/testing/cxl/test/cxl.c ++++ b/tools/testing/cxl/test/cxl.c +@@ -693,26 +693,22 @@ static int mock_decoder_commit(struct cx + return 0; + } + +-static int mock_decoder_reset(struct cxl_decoder *cxld) ++static void mock_decoder_reset(struct cxl_decoder *cxld) + { + struct cxl_port *port = to_cxl_port(cxld->dev.parent); + int id = cxld->id; + + if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0) +- return 0; ++ return; + + dev_dbg(&port->dev, "%s reset\n", dev_name(&cxld->dev)); +- if (port->commit_end != id) { ++ if (port->commit_end == id) ++ cxl_port_commit_reap(cxld); ++ else + dev_dbg(&port->dev, + "%s: out of order reset, expected decoder%d.%d\n", + dev_name(&cxld->dev), port->id, port->commit_end); +- return -EBUSY; +- } +- +- port->commit_end--; + cxld->flags &= ~CXL_DECODER_F_ENABLE; +- +- return 0; + } + + static void default_mock_decoder(struct cxl_decoder *cxld) diff --git a/queue-6.11/series b/queue-6.11/series index ab000d45903..267b54a9bd5 100644 --- a/queue-6.11/series +++ b/queue-6.11/series @@ -139,3 +139,6 @@ mm-shrinker-avoid-memleak-in-alloc_shrinker_info.patch firmware-microchip-auto-update-fix-poll_complete-to-not-report-spurious-timeout-errors.patch thunderbolt-fix-kasan-reported-stack-out-of-bounds-read-in-tb_retimer_scan.patch thunderbolt-honor-tmu-requirements-in-the-domain-when-setting-tmu-mode.patch +soc-qcom-pmic_glink-handle-glink-intent-allocation-rejections.patch +cxl-port-fix-use-after-free-permit-out-of-order-decoder-shutdown.patch +cxl-port-fix-cxl-port-initialization-order-when-the-subsystem-is-built-in.patch diff --git a/queue-6.11/soc-qcom-pmic_glink-handle-glink-intent-allocation-rejections.patch b/queue-6.11/soc-qcom-pmic_glink-handle-glink-intent-allocation-rejections.patch new file mode 100644 index 00000000000..294a4940f26 --- /dev/null +++ b/queue-6.11/soc-qcom-pmic_glink-handle-glink-intent-allocation-rejections.patch @@ -0,0 +1,99 @@ +From f8c879192465d9f328cb0df07208ef077c560bb1 Mon Sep 17 00:00:00 2001 +From: Bjorn Andersson +Date: Wed, 23 Oct 2024 17:24:33 +0000 +Subject: soc: qcom: pmic_glink: Handle GLINK intent allocation rejections + +From: Bjorn Andersson + +commit f8c879192465d9f328cb0df07208ef077c560bb1 upstream. + +Some versions of the pmic_glink firmware does not allow dynamic GLINK +intent allocations, attempting to send a message before the firmware has +allocated its receive buffers and announced these intent allocations +will fail. When this happens something like this showns up in the log: + + pmic_glink_altmode.pmic_glink_altmode pmic_glink.altmode.0: failed to send altmode request: 0x10 (-125) + pmic_glink_altmode.pmic_glink_altmode pmic_glink.altmode.0: failed to request altmode notifications: -125 + ucsi_glink.pmic_glink_ucsi pmic_glink.ucsi.0: failed to send UCSI read request: -125 + qcom_battmgr.pmic_glink_power_supply pmic_glink.power-supply.0: failed to request power notifications + +GLINK has been updated to distinguish between the cases where the remote +is going down (-ECANCELED) and the intent allocation being rejected +(-EAGAIN). + +Retry the send until intent buffers becomes available, or an actual +error occur. + +To avoid infinitely waiting for the firmware in the event that this +misbehaves and no intents arrive, an arbitrary 5 second timeout is +used. + +This patch was developed with input from Chris Lew. + +Reported-by: Johan Hovold +Closes: https://lore.kernel.org/all/Zqet8iInnDhnxkT9@hovoldconsulting.com/#t +Cc: stable@vger.kernel.org # rpmsg: glink: Handle rejected intent request better +Fixes: 58ef4ece1e41 ("soc: qcom: pmic_glink: Introduce base PMIC GLINK driver") +Tested-by: Johan Hovold +Reviewed-by: Johan Hovold +Signed-off-by: Bjorn Andersson +Reviewed-by: Chris Lew +Link: https://lore.kernel.org/r/20241023-pmic-glink-ecancelled-v2-2-ebc268129407@oss.qualcomm.com +Signed-off-by: Bjorn Andersson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/soc/qcom/pmic_glink.c | 25 ++++++++++++++++++++++--- + 1 file changed, 22 insertions(+), 3 deletions(-) + +--- a/drivers/soc/qcom/pmic_glink.c ++++ b/drivers/soc/qcom/pmic_glink.c +@@ -4,6 +4,7 @@ + * Copyright (c) 2022, Linaro Ltd + */ + #include ++#include + #include + #include + #include +@@ -13,6 +14,8 @@ + #include + #include + ++#define PMIC_GLINK_SEND_TIMEOUT (5 * HZ) ++ + enum { + PMIC_GLINK_CLIENT_BATT = 0, + PMIC_GLINK_CLIENT_ALTMODE, +@@ -112,13 +115,29 @@ EXPORT_SYMBOL_GPL(pmic_glink_client_regi + int pmic_glink_send(struct pmic_glink_client *client, void *data, size_t len) + { + struct pmic_glink *pg = client->pg; ++ bool timeout_reached = false; ++ unsigned long start; + int ret; + + mutex_lock(&pg->state_lock); +- if (!pg->ept) ++ if (!pg->ept) { + ret = -ECONNRESET; +- else +- ret = rpmsg_send(pg->ept, data, len); ++ } else { ++ start = jiffies; ++ for (;;) { ++ ret = rpmsg_send(pg->ept, data, len); ++ if (ret != -EAGAIN) ++ break; ++ ++ if (timeout_reached) { ++ ret = -ETIMEDOUT; ++ break; ++ } ++ ++ usleep_range(1000, 5000); ++ timeout_reached = time_after(jiffies, start + PMIC_GLINK_SEND_TIMEOUT); ++ } ++ } + mutex_unlock(&pg->state_lock); + + return ret; -- 2.47.3