From: Greg Kroah-Hartman Date: Tue, 5 Nov 2024 16:52:00 +0000 (+0100) Subject: 6.6-stable patches X-Git-Tag: v4.19.323~64 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=9ad687cdfbdec56b871cec05ff95483e910df45b;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: cxl-port-fix-use-after-free-permit-out-of-order-decoder-shutdown.patch --- diff --git a/queue-6.6/cxl-port-fix-use-after-free-permit-out-of-order-decoder-shutdown.patch b/queue-6.6/cxl-port-fix-use-after-free-permit-out-of-order-decoder-shutdown.patch new file mode 100644 index 00000000000..5111f304d43 --- /dev/null +++ b/queue-6.6/cxl-port-fix-use-after-free-permit-out-of-order-decoder-shutdown.patch @@ -0,0 +1,380 @@ +From 101c268bd2f37e965a5468353e62d154db38838e Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Tue, 22 Oct 2024 18:43:49 -0700 +Subject: cxl/port: Fix use-after-free, permit out-of-order decoder shutdown + +From: Dan Williams + +commit 101c268bd2f37e965a5468353e62d154db38838e upstream. + +In support of investigating an initialization failure report [1], +cxl_test was updated to register mock memory-devices after the mock +root-port/bus device had been registered. That led to cxl_test crashing +with a use-after-free bug with the following signature: + + cxl_port_attach_region: cxl region3: cxl_host_bridge.0:port3 decoder3.0 add: mem0:decoder7.0 @ 0 next: cxl_switch_uport.0 nr_eps: 1 nr_targets: 1 + cxl_port_attach_region: cxl region3: cxl_host_bridge.0:port3 decoder3.0 add: mem4:decoder14.0 @ 1 next: cxl_switch_uport.0 nr_eps: 2 nr_targets: 1 + cxl_port_setup_targets: cxl region3: cxl_switch_uport.0:port6 target[0] = cxl_switch_dport.0 for mem0:decoder7.0 @ 0 +1) cxl_port_setup_targets: cxl region3: cxl_switch_uport.0:port6 target[1] = cxl_switch_dport.4 for mem4:decoder14.0 @ 1 + [..] + cxld_unregister: cxl decoder14.0: + cxl_region_decode_reset: cxl_region region3: + mock_decoder_reset: cxl_port port3: decoder3.0 reset +2) mock_decoder_reset: cxl_port port3: decoder3.0: out of order reset, expected decoder3.1 + cxl_endpoint_decoder_release: cxl decoder14.0: + [..] + cxld_unregister: cxl decoder7.0: +3) cxl_region_decode_reset: cxl_region region3: + Oops: general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6bc3: 0000 [#1] PREEMPT SMP PTI + [..] + RIP: 0010:to_cxl_port+0x8/0x60 [cxl_core] + [..] + Call Trace: + + cxl_region_decode_reset+0x69/0x190 [cxl_core] + cxl_region_detach+0xe8/0x210 [cxl_core] + cxl_decoder_kill_region+0x27/0x40 [cxl_core] + cxld_unregister+0x5d/0x60 [cxl_core] + +At 1) a region has been established with 2 endpoint decoders (7.0 and +14.0). Those endpoints share a common switch-decoder in the topology +(3.0). At teardown, 2), decoder14.0 is the first to be removed and hits +the "out of order reset case" in the switch decoder. The effect though +is that region3 cleanup is aborted leaving it in-tact and +referencing decoder14.0. At 3) the second attempt to teardown region3 +trips over the stale decoder14.0 object which has long since been +deleted. + +The fix here is to recognize that the CXL specification places no +mandate on in-order shutdown of switch-decoders, the driver enforces +in-order allocation, and hardware enforces in-order commit. So, rather +than fail and leave objects dangling, always remove them. + +In support of making cxl_region_decode_reset() always succeed, +cxl_region_invalidate_memregion() failures are turned into warnings. +Crashing the kernel is ok there since system integrity is at risk if +caches cannot be managed around physical address mutation events like +CXL region destruction. + +A new device_for_each_child_reverse_from() is added to cleanup +port->commit_end after all dependent decoders have been disabled. In +other words if decoders are allocated 0->1->2 and disabled 1->2->0 then +port->commit_end only decrements from 2 after 2 has been disabled, and +it decrements all the way to zero since 1 was disabled previously. + +Link: http://lore.kernel.org/20241004212504.1246-1-gourry@gourry.net [1] +Cc: stable@vger.kernel.org +Fixes: 176baefb2eb5 ("cxl/hdm: Commit decoder state to hardware") +Reviewed-by: Jonathan Cameron +Cc: Greg Kroah-Hartman +Cc: Davidlohr Bueso +Cc: Dave Jiang +Cc: Alison Schofield +Cc: Ira Weiny +Cc: Zijun Hu +Signed-off-by: Dan Williams +Reviewed-by: Ira Weiny +Link: https://patch.msgid.link/172964782781.81806.17902885593105284330.stgit@dwillia2-xfh.jf.intel.com +Signed-off-by: Ira Weiny +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/core.c | 35 ++++++++++++++++++++++++++++++ + drivers/cxl/core/hdm.c | 50 ++++++++++++++++++++++++++++++++++++------- + drivers/cxl/core/region.c | 48 +++++++++++------------------------------ + drivers/cxl/cxl.h | 3 +- + include/linux/device.h | 3 ++ + tools/testing/cxl/test/cxl.c | 14 ++++-------- + 6 files changed, 100 insertions(+), 53 deletions(-) + +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -4012,6 +4012,41 @@ int device_for_each_child_reverse(struct + EXPORT_SYMBOL_GPL(device_for_each_child_reverse); + + /** ++ * device_for_each_child_reverse_from - device child iterator in reversed order. ++ * @parent: parent struct device. ++ * @from: optional starting point in child list ++ * @fn: function to be called for each device. ++ * @data: data for the callback. ++ * ++ * Iterate over @parent's child devices, starting at @from, and call @fn ++ * for each, passing it @data. This helper is identical to ++ * device_for_each_child_reverse() when @from is NULL. ++ * ++ * @fn is checked each iteration. If it returns anything other than 0, ++ * iteration stop and that value is returned to the caller of ++ * device_for_each_child_reverse_from(); ++ */ ++int device_for_each_child_reverse_from(struct device *parent, ++ struct device *from, const void *data, ++ int (*fn)(struct device *, const void *)) ++{ ++ struct klist_iter i; ++ struct device *child; ++ int error = 0; ++ ++ if (!parent->p) ++ return 0; ++ ++ klist_iter_init_node(&parent->p->klist_children, &i, ++ (from ? &from->p->knode_parent : NULL)); ++ while ((child = prev_device(&i)) && !error) ++ error = fn(child, data); ++ klist_iter_exit(&i); ++ return error; ++} ++EXPORT_SYMBOL_GPL(device_for_each_child_reverse_from); ++ ++/** + * device_find_child - device iterator for locating a particular device. + * @parent: parent struct device + * @match: Callback function to check device +--- a/drivers/cxl/core/hdm.c ++++ b/drivers/cxl/core/hdm.c +@@ -723,7 +723,44 @@ static int cxl_decoder_commit(struct cxl + return 0; + } + +-static int cxl_decoder_reset(struct cxl_decoder *cxld) ++static int commit_reap(struct device *dev, const void *data) ++{ ++ struct cxl_port *port = to_cxl_port(dev->parent); ++ struct cxl_decoder *cxld; ++ ++ if (!is_switch_decoder(dev) && !is_endpoint_decoder(dev)) ++ return 0; ++ ++ cxld = to_cxl_decoder(dev); ++ if (port->commit_end == cxld->id && ++ ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) { ++ port->commit_end--; ++ dev_dbg(&port->dev, "reap: %s commit_end: %d\n", ++ dev_name(&cxld->dev), port->commit_end); ++ } ++ ++ return 0; ++} ++ ++void cxl_port_commit_reap(struct cxl_decoder *cxld) ++{ ++ struct cxl_port *port = to_cxl_port(cxld->dev.parent); ++ ++ lockdep_assert_held_write(&cxl_region_rwsem); ++ ++ /* ++ * Once the highest committed decoder is disabled, free any other ++ * decoders that were pinned allocated by out-of-order release. ++ */ ++ port->commit_end--; ++ dev_dbg(&port->dev, "reap: %s commit_end: %d\n", dev_name(&cxld->dev), ++ port->commit_end); ++ device_for_each_child_reverse_from(&port->dev, &cxld->dev, NULL, ++ commit_reap); ++} ++EXPORT_SYMBOL_NS_GPL(cxl_port_commit_reap, CXL); ++ ++static void cxl_decoder_reset(struct cxl_decoder *cxld) + { + struct cxl_port *port = to_cxl_port(cxld->dev.parent); + struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); +@@ -732,14 +769,14 @@ static int cxl_decoder_reset(struct cxl_ + u32 ctrl; + + if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0) +- return 0; ++ return; + +- if (port->commit_end != id) { ++ if (port->commit_end == id) ++ cxl_port_commit_reap(cxld); ++ else + dev_dbg(&port->dev, + "%s: out of order reset, expected decoder%d.%d\n", + dev_name(&cxld->dev), port->id, port->commit_end); +- return -EBUSY; +- } + + down_read(&cxl_dpa_rwsem); + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id)); +@@ -752,7 +789,6 @@ static int cxl_decoder_reset(struct cxl_ + writel(0, hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(id)); + up_read(&cxl_dpa_rwsem); + +- port->commit_end--; + cxld->flags &= ~CXL_DECODER_F_ENABLE; + + /* Userspace is now responsible for reconfiguring this decoder */ +@@ -762,8 +798,6 @@ static int cxl_decoder_reset(struct cxl_ + cxled = to_cxl_endpoint_decoder(&cxld->dev); + cxled->state = CXL_DECODER_STATE_MANUAL; + } +- +- return 0; + } + + static int cxl_setup_hdm_decoder_from_dvsec( +--- a/drivers/cxl/core/region.c ++++ b/drivers/cxl/core/region.c +@@ -128,8 +128,8 @@ static int cxl_region_invalidate_memregi + "Bypassing cpu_cache_invalidate_memregion() for testing!\n"); + return 0; + } else { +- dev_err(&cxlr->dev, +- "Failed to synchronize CPU cache state\n"); ++ dev_WARN(&cxlr->dev, ++ "Failed to synchronize CPU cache state\n"); + return -ENXIO; + } + } +@@ -138,19 +138,17 @@ static int cxl_region_invalidate_memregi + return 0; + } + +-static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) ++static void cxl_region_decode_reset(struct cxl_region *cxlr, int count) + { + struct cxl_region_params *p = &cxlr->params; +- int i, rc = 0; ++ int i; + + /* +- * Before region teardown attempt to flush, and if the flush +- * fails cancel the region teardown for data consistency +- * concerns ++ * Before region teardown attempt to flush, evict any data cached for ++ * this region, or scream loudly about missing arch / platform support ++ * for CXL teardown. + */ +- rc = cxl_region_invalidate_memregion(cxlr); +- if (rc) +- return rc; ++ cxl_region_invalidate_memregion(cxlr); + + for (i = count - 1; i >= 0; i--) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; +@@ -173,23 +171,17 @@ static int cxl_region_decode_reset(struc + cxl_rr = cxl_rr_load(iter, cxlr); + cxld = cxl_rr->decoder; + if (cxld->reset) +- rc = cxld->reset(cxld); +- if (rc) +- return rc; ++ cxld->reset(cxld); + set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); + } + + endpoint_reset: +- rc = cxled->cxld.reset(&cxled->cxld); +- if (rc) +- return rc; ++ cxled->cxld.reset(&cxled->cxld); + set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); + } + + /* all decoders associated with this region have been torn down */ + clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); +- +- return 0; + } + + static int commit_decoder(struct cxl_decoder *cxld) +@@ -305,16 +297,8 @@ static ssize_t commit_store(struct devic + * still pending. + */ + if (p->state == CXL_CONFIG_RESET_PENDING) { +- rc = cxl_region_decode_reset(cxlr, p->interleave_ways); +- /* +- * Revert to committed since there may still be active +- * decoders associated with this region, or move forward +- * to active to mark the reset successful +- */ +- if (rc) +- p->state = CXL_CONFIG_COMMIT; +- else +- p->state = CXL_CONFIG_ACTIVE; ++ cxl_region_decode_reset(cxlr, p->interleave_ways); ++ p->state = CXL_CONFIG_ACTIVE; + } + } + +@@ -1945,13 +1929,7 @@ static int cxl_region_detach(struct cxl_ + get_device(&cxlr->dev); + + if (p->state > CXL_CONFIG_ACTIVE) { +- /* +- * TODO: tear down all impacted regions if a device is +- * removed out of order +- */ +- rc = cxl_region_decode_reset(cxlr, p->interleave_ways); +- if (rc) +- goto out; ++ cxl_region_decode_reset(cxlr, p->interleave_ways); + p->state = CXL_CONFIG_ACTIVE; + } + +--- a/drivers/cxl/cxl.h ++++ b/drivers/cxl/cxl.h +@@ -347,7 +347,7 @@ struct cxl_decoder { + struct cxl_region *region; + unsigned long flags; + int (*commit)(struct cxl_decoder *cxld); +- int (*reset)(struct cxl_decoder *cxld); ++ void (*reset)(struct cxl_decoder *cxld); + }; + + /* +@@ -682,6 +682,7 @@ static inline bool is_cxl_root(struct cx + int cxl_num_decoders_committed(struct cxl_port *port); + bool is_cxl_port(const struct device *dev); + struct cxl_port *to_cxl_port(const struct device *dev); ++void cxl_port_commit_reap(struct cxl_decoder *cxld); + struct pci_bus; + int devm_cxl_register_pci_bus(struct device *host, struct device *uport_dev, + struct pci_bus *bus); +--- a/include/linux/device.h ++++ b/include/linux/device.h +@@ -1063,6 +1063,9 @@ int device_for_each_child(struct device + int (*fn)(struct device *dev, void *data)); + int device_for_each_child_reverse(struct device *dev, void *data, + int (*fn)(struct device *dev, void *data)); ++int device_for_each_child_reverse_from(struct device *parent, ++ struct device *from, const void *data, ++ int (*fn)(struct device *, const void *)); + struct device *device_find_child(struct device *dev, void *data, + int (*match)(struct device *dev, void *data)); + struct device *device_find_child_by_name(struct device *parent, +--- a/tools/testing/cxl/test/cxl.c ++++ b/tools/testing/cxl/test/cxl.c +@@ -687,26 +687,22 @@ static int mock_decoder_commit(struct cx + return 0; + } + +-static int mock_decoder_reset(struct cxl_decoder *cxld) ++static void mock_decoder_reset(struct cxl_decoder *cxld) + { + struct cxl_port *port = to_cxl_port(cxld->dev.parent); + int id = cxld->id; + + if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0) +- return 0; ++ return; + + dev_dbg(&port->dev, "%s reset\n", dev_name(&cxld->dev)); +- if (port->commit_end != id) { ++ if (port->commit_end == id) ++ cxl_port_commit_reap(cxld); ++ else + dev_dbg(&port->dev, + "%s: out of order reset, expected decoder%d.%d\n", + dev_name(&cxld->dev), port->id, port->commit_end); +- return -EBUSY; +- } +- +- port->commit_end--; + cxld->flags &= ~CXL_DECODER_F_ENABLE; +- +- return 0; + } + + static void default_mock_decoder(struct cxl_decoder *cxld) diff --git a/queue-6.6/series b/queue-6.6/series index b20cefd262f..fc5c7c5cd59 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -103,3 +103,4 @@ risc-v-acpi-fix-early_ioremap-to-early_memremap.patch mm-shmem-fix-data-race-in-shmem_getattr.patch tools-mm-werror-fixes-in-page-types-slabinfo.patch thunderbolt-honor-tmu-requirements-in-the-domain-when-setting-tmu-mode.patch +cxl-port-fix-use-after-free-permit-out-of-order-decoder-shutdown.patch