]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
dax/hmem, cxl: Defer and resolve Soft Reserved ownership
authorSmita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Sun, 22 Mar 2026 19:53:41 +0000 (19:53 +0000)
committerDave Jiang <dave.jiang@intel.com>
Fri, 27 Mar 2026 17:25:47 +0000 (10:25 -0700)
The current probe time ownership check for Soft Reserved memory based
solely on CXL window intersection is insufficient. dax_hmem probing is not
always guaranteed to run after CXL enumeration and region assembly, which
can lead to incorrect ownership decisions before the CXL stack has
finished publishing windows and assembling committed regions.

Introduce deferred ownership handling for Soft Reserved ranges that
intersect CXL windows. When such a range is encountered during the
initial dax_hmem probe, schedule deferred work to wait for the CXL stack
to complete enumeration and region assembly before deciding ownership.

Once the deferred work runs, evaluate each Soft Reserved range
individually: if a CXL region fully contains the range, skip it and let
dax_cxl bind. Otherwise, register it with dax_hmem. This per-range
ownership model avoids the need for CXL region teardown and
alloc_dax_region() resource exclusion prevents double claiming.

Introduce a boolean flag dax_hmem_initial_probe to live inside device.c
so it survives module reload. Ensure dax_cxl defers driver registration
until dax_hmem has completed ownership resolution. dax_cxl calls
dax_hmem_flush_work() before cxl_driver_register(), which both waits for
the deferred work to complete and creates a module symbol dependency that
forces dax_hmem.ko to load before dax_cxl.

Co-developed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260322195343.206900-9-Smita.KoralahalliChannabasappa@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
drivers/dax/bus.h
drivers/dax/cxl.c
drivers/dax/hmem/device.c
drivers/dax/hmem/hmem.c

index cbbf64443098c08d944878a190a0da69eccbfbf4..ebbfe2d6da1461eac6a4adc0cede6e9b1c382dcd 100644 (file)
@@ -49,6 +49,13 @@ void dax_driver_unregister(struct dax_device_driver *dax_drv);
 void kill_dev_dax(struct dev_dax *dev_dax);
 bool static_dev_dax(struct dev_dax *dev_dax);
 
+#if IS_ENABLED(CONFIG_DEV_DAX_HMEM)
+extern bool dax_hmem_initial_probe;
+void dax_hmem_flush_work(void);
+#else
+static inline void dax_hmem_flush_work(void) { }
+#endif
+
 #define MODULE_ALIAS_DAX_DEVICE(type) \
        MODULE_ALIAS("dax:t" __stringify(type) "*")
 #define DAX_DEVICE_MODALIAS_FMT "dax:t%d"
index a2136adfa186e4d0ad6ea9db0208d33e0ba55ed1..3ab39b77843d5e9a0b5ae1d8bc15e64f88bebd08 100644 (file)
@@ -44,6 +44,7 @@ static struct cxl_driver cxl_dax_region_driver = {
 
 static void cxl_dax_region_driver_register(struct work_struct *work)
 {
+       dax_hmem_flush_work();
        cxl_driver_register(&cxl_dax_region_driver);
 }
 
index 56e3cbd181b5c2ac20dab0d62a260c2d09aa175a..991a4bf7d96920de98e3d12ba1c479e6f2b71d68 100644 (file)
@@ -8,6 +8,9 @@
 static bool nohmem;
 module_param_named(disable, nohmem, bool, 0444);
 
+bool dax_hmem_initial_probe;
+EXPORT_SYMBOL_GPL(dax_hmem_initial_probe);
+
 static bool platform_initialized;
 static DEFINE_MUTEX(hmem_resource_lock);
 static struct resource hmem_active = {
index ca752db03201cf16ee4ace4f55a45872682897f9..9ceda6b5cadf35fb3843f95434f7d9d0ec106838 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/memregion.h>
 #include <linux/module.h>
 #include <linux/dax.h>
+#include <cxl/cxl.h>
 #include "../bus.h"
 
 static bool region_idle;
@@ -58,6 +59,23 @@ static void release_hmem(void *pdev)
        platform_device_unregister(pdev);
 }
 
+struct dax_defer_work {
+       struct platform_device *pdev;
+       struct work_struct work;
+};
+
+static void process_defer_work(struct work_struct *w);
+
+static struct dax_defer_work dax_hmem_work = {
+       .work = __WORK_INITIALIZER(dax_hmem_work.work, process_defer_work),
+};
+
+void dax_hmem_flush_work(void)
+{
+       flush_work(&dax_hmem_work.work);
+}
+EXPORT_SYMBOL_GPL(dax_hmem_flush_work);
+
 static int __hmem_register_device(struct device *host, int target_nid,
                                  const struct resource *res)
 {
@@ -122,6 +140,11 @@ static int hmem_register_device(struct device *host, int target_nid,
        if (IS_ENABLED(CONFIG_DEV_DAX_CXL) &&
            region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
                              IORES_DESC_CXL) != REGION_DISJOINT) {
+               if (!dax_hmem_initial_probe) {
+                       dev_dbg(host, "await CXL initial probe: %pr\n", res);
+                       queue_work(system_long_wq, &dax_hmem_work.work);
+                       return 0;
+               }
                dev_dbg(host, "deferring range to CXL: %pr\n", res);
                return 0;
        }
@@ -129,8 +152,54 @@ static int hmem_register_device(struct device *host, int target_nid,
        return __hmem_register_device(host, target_nid, res);
 }
 
+static int hmem_register_cxl_device(struct device *host, int target_nid,
+                                   const struct resource *res)
+{
+       if (region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
+                             IORES_DESC_CXL) == REGION_DISJOINT)
+               return 0;
+
+       if (cxl_region_contains_resource((struct resource *)res)) {
+               dev_dbg(host, "CXL claims resource, dropping: %pr\n", res);
+               return 0;
+       }
+
+       dev_dbg(host, "CXL did not claim resource, registering: %pr\n", res);
+       return __hmem_register_device(host, target_nid, res);
+}
+
+static void process_defer_work(struct work_struct *w)
+{
+       struct dax_defer_work *work = container_of(w, typeof(*work), work);
+       struct platform_device *pdev;
+
+       if (!work->pdev)
+               return;
+
+       pdev = work->pdev;
+
+       /* Relies on cxl_acpi and cxl_pci having had a chance to load */
+       wait_for_device_probe();
+
+       guard(device)(&pdev->dev);
+       if (!pdev->dev.driver)
+               return;
+
+       if (!dax_hmem_initial_probe) {
+               dax_hmem_initial_probe = true;
+               walk_hmem_resources(&pdev->dev, hmem_register_cxl_device);
+       }
+}
+
 static int dax_hmem_platform_probe(struct platform_device *pdev)
 {
+       if (work_pending(&dax_hmem_work.work))
+               return -EBUSY;
+
+       if (!dax_hmem_work.pdev)
+               dax_hmem_work.pdev =
+                       to_platform_device(get_device(&pdev->dev));
+
        return walk_hmem_resources(&pdev->dev, hmem_register_device);
 }
 
@@ -168,6 +237,11 @@ static __init int dax_hmem_init(void)
 
 static __exit void dax_hmem_exit(void)
 {
+       if (dax_hmem_work.pdev) {
+               flush_work(&dax_hmem_work.work);
+               put_device(&dax_hmem_work.pdev->dev);
+       }
+
        platform_driver_unregister(&dax_hmem_driver);
        platform_driver_unregister(&dax_hmem_platform_driver);
 }