nvdimm/btt: Handle preemption in BTT lane acquisition

author Alison Schofield <alison.schofield@intel.com>

Thu, 28 May 2026 02:16:22 +0000 (19:16 -0700)

committer Alison Schofield <alison.schofield@intel.com>

Mon, 1 Jun 2026 20:53:24 +0000 (13:53 -0700)
author Alison Schofield <alison.schofield@intel.com>
Thu, 28 May 2026 02:16:22 +0000 (19:16 -0700)
committer Alison Schofield <alison.schofield@intel.com>
Mon, 1 Jun 2026 20:53:24 +0000 (13:53 -0700)
diff --git a/Documentation/driver-api/nvdimm/btt.rst b/Documentation/driver-api/nvdimm/btt.rst

index 2d8269f834bd60696500c94ef6cd070d27cdcef7..d29fab95f149422ea5c40ae3ecd33f9835ceb748 100644 (file)
--- a/Documentation/driver-api/nvdimm/btt.rst
+++ b/Documentation/driver-api/nvdimm/btt.rst
@@ -161,9 +161,8 @@ process::
         nlanes = min(nfree, num_cpus)
  
  A lane number is obtained at the start of any IO, and is used for indexing into
-all the on-disk and in-memory data structures for the duration of the IO. If
-there are more CPUs than the max number of available lanes, than lanes are
-protected by spinlocks.
+all the on-disk and in-memory data structures for the duration of the IO. Lanes
+are protected by mutexes.
  
  
  d. In-memory data structure: Read Tracking Table (RTT)
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h

index b199eea3260ef6ea3e6b4aba2a2fdca230121030..197e5368c0a46ba5969376710abd97d98cb94912 100644 (file)
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -365,11 +365,6 @@ unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd);
         for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \
                         res; res = next, next = next ? next->sibling : NULL)
  
-struct nd_percpu_lane {
-       int count;
-       spinlock_t lock;
-};
-
  enum nd_label_flags {
         ND_LABEL_REAP,
  };
@@ -400,6 +395,10 @@ struct nd_mapping {
         struct nvdimm_drvdata *ndd;
  };
  
+struct nd_lane {
+       struct mutex lock; /* serialize lane access */
+} ____cacheline_aligned_in_smp;
+
  struct nd_region {
         struct device dev;
         struct ida ns_ida;
@@ -420,7 +419,7 @@ struct nd_region {
         struct kernfs_node *bb_state;
         struct badblocks bb;
         struct nd_interleave_set *nd_set;
-       struct nd_percpu_lane __percpu *lane;
+       struct nd_lane *lane;
         int (*flush)(struct nd_region *nd_region, struct bio *bio);
         struct nd_mapping mapping[] __counted_by(ndr_mappings);
  };
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c

index e35c2e18518f0c581a59fb47b5df67a7ed7b6c16..5e079d61cbaa32ec0610d6b22c1aefe66ccf76e6 100644 (file)
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -192,7 +192,9 @@ static void nd_region_release(struct device *dev)
  
                 put_device(&nvdimm->dev);
         }
-       free_percpu(nd_region->lane);
+       for (i = 0; i < nd_region->num_lanes; i++)
+               mutex_destroy(&nd_region->lane[i].lock);
+       kfree(nd_region->lane);
         if (!test_bit(ND_REGION_CXL, &nd_region->flags))
                 memregion_free(nd_region->id);
         kfree(nd_region);
@@ -904,52 +906,30 @@ void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev)
   * nd_region_acquire_lane - allocate and lock a lane
   * @nd_region: region id and number of lanes possible
   *
- * A lane correlates to a BLK-data-window and/or a log slot in the BTT.
- * We optimize for the common case where there are 256 lanes, one
- * per-cpu.  For larger systems we need to lock to share lanes.  For now
- * this implementation assumes the cost of maintaining an allocator for
- * free lanes is on the order of the lock hold time, so it implements a
- * static lane = cpu % num_lanes mapping.
+ * A lane correlates to a log slot in the BTT. Lanes are shared across
+ * CPUs using a static lane = cpu % num_lanes mapping, with a per-lane
+ * mutex to serialize access.
   *
- * In the case of a BTT instance on top of a BLK namespace a lane may be
- * acquired recursively.  We lock on the first instance.
- *
- * In the case of a BTT instance on top of PMEM, we only acquire a lane
- * for the BTT metadata updates.
+ * Callers must be in sleepable context. The only in-tree caller is
+ * BTT's ->submit_bio handler (btt_read_pg / btt_write_pg).
   */
  unsigned int nd_region_acquire_lane(struct nd_region *nd_region)
+       __acquires(&nd_region->lane[lane].lock)
  {
-       unsigned int cpu, lane;
-
-       migrate_disable();
-       cpu = smp_processor_id();
-       if (nd_region->num_lanes < nr_cpu_ids) {
-               struct nd_percpu_lane *ndl_lock, *ndl_count;
+       unsigned int lane;
  
-               lane = cpu % nd_region->num_lanes;
-               ndl_count = per_cpu_ptr(nd_region->lane, cpu);
-               ndl_lock = per_cpu_ptr(nd_region->lane, lane);
-               if (ndl_count->count++ == 0)
-                       spin_lock(&ndl_lock->lock);
-       } else
-               lane = cpu;
+       might_sleep();
  
+       lane = raw_smp_processor_id() % nd_region->num_lanes;
+       mutex_lock(&nd_region->lane[lane].lock);
         return lane;
  }
  EXPORT_SYMBOL(nd_region_acquire_lane);
  
  void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)
+       __releases(&nd_region->lane[lane].lock)
  {
-       if (nd_region->num_lanes < nr_cpu_ids) {
-               unsigned int cpu = smp_processor_id();
-               struct nd_percpu_lane *ndl_lock, *ndl_count;
-
-               ndl_count = per_cpu_ptr(nd_region->lane, cpu);
-               ndl_lock = per_cpu_ptr(nd_region->lane, lane);
-               if (--ndl_count->count == 0)
-                       spin_unlock(&ndl_lock->lock);
-       }
-       migrate_enable();
+       mutex_unlock(&nd_region->lane[lane].lock);
  }
  EXPORT_SYMBOL(nd_region_release_lane);
  
@@ -1019,17 +999,16 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
                         goto err_id;
         }
  
-       nd_region->lane = alloc_percpu(struct nd_percpu_lane);
+       nd_region->num_lanes = ndr_desc->num_lanes;
+       if (!nd_region->num_lanes)
+               goto err_percpu;
+       nd_region->lane = kcalloc(nd_region->num_lanes,
+                                 sizeof(*nd_region->lane), GFP_KERNEL);
         if (!nd_region->lane)
                 goto err_percpu;
  
-        for (i = 0; i < nr_cpu_ids; i++) {
-               struct nd_percpu_lane *ndl;
-
-               ndl = per_cpu_ptr(nd_region->lane, i);
-               spin_lock_init(&ndl->lock);
-               ndl->count = 0;
-       }
+       for (i = 0; i < nd_region->num_lanes; i++)
+               mutex_init(&nd_region->lane[i].lock);
  
         for (i = 0; i < ndr_desc->num_mappings; i++) {
                 struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
@@ -1046,7 +1025,6 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
         }
         nd_region->provider_data = ndr_desc->provider_data;
         nd_region->nd_set = ndr_desc->nd_set;
-       nd_region->num_lanes = ndr_desc->num_lanes;
         nd_region->flags = ndr_desc->flags;
         nd_region->ro = ro;
         nd_region->numa_node = ndr_desc->numa_node;
author	Alison Schofield <alison.schofield@intel.com>
	Thu, 28 May 2026 02:16:22 +0000 (19:16 -0700)
committer	Alison Schofield <alison.schofield@intel.com>
	Mon, 1 Jun 2026 20:53:24 +0000 (13:53 -0700)
Documentation/driver-api/nvdimm/btt.rst		patch \| blob \| blame \| history
drivers/nvdimm/nd.h		patch \| blob \| blame \| history
drivers/nvdimm/region_devs.c		patch \| blob \| blame \| history