bcache: reduce gc latency by processing less nodes and sleep less time

author Coly Li <colyli@fnnas.com>

Thu, 13 Nov 2025 05:36:26 +0000 (13:36 +0800)

committer Jens Axboe <axboe@kernel.dk>

Thu, 13 Nov 2025 16:18:06 +0000 (09:18 -0700)
author Coly Li <colyli@fnnas.com>
Thu, 13 Nov 2025 05:36:26 +0000 (13:36 +0800)
committer Jens Axboe <axboe@kernel.dk>
Thu, 13 Nov 2025 16:18:06 +0000 (09:18 -0700)
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c

index db3684819e38e281975ed7de435bbbf34cb97c23..7708d92df23e870884dd62ba4f5fd4ddc95790ee 100644 (file)
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -399,7 +399,11 @@ long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait)
                                 TASK_UNINTERRUPTIBLE);
  
                 mutex_unlock(&ca->set->bucket_lock);
+
+               atomic_inc(&ca->set->bucket_wait_cnt);
                 schedule();
+               atomic_dec(&ca->set->bucket_wait_cnt);
+
                 mutex_lock(&ca->set->bucket_lock);
         } while (!fifo_pop(&ca->free[RESERVE_NONE], r) &&
                  !fifo_pop(&ca->free[reserve], r));
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h

index b8bd6d4a4298659b5771eedc6728994de1a8ad63..8ccacba8554756273aced13d25a06b9bd0de21c8 100644 (file)
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -604,6 +604,7 @@ struct cache_set {
          */
         atomic_t                prio_blocked;
         wait_queue_head_t       bucket_wait;
+       atomic_t                bucket_wait_cnt;
  
         /*
          * For any bio we don't skip we subtract the number of sectors from
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c

index 210b59007d98d806558be15063b879af18ed54ac..5d922d301ab6c9fd95d3988a331939302aceb7af 100644 (file)
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -89,8 +89,9 @@
   * Test module load/unload
   */
  
-#define MAX_GC_TIMES           100
-#define MIN_GC_NODES           100
+#define MAX_GC_TIMES_SHIFT     7  /* 128 loops */
+#define GC_NODES_MIN           10
+#define GC_SLEEP_MS_MIN                10
  #define GC_SLEEP_MS            100
  
  #define PTR_DIRTY_BIT          (((uint64_t) 1 << 36))
@@ -1578,29 +1579,29 @@ static unsigned int btree_gc_count_keys(struct btree *b)
  
  static size_t btree_gc_min_nodes(struct cache_set *c)
  {
-       size_t min_nodes;
+       size_t min_nodes = GC_NODES_MIN;
  
-       /*
-        * Since incremental GC would stop 100ms when front
-        * side I/O comes, so when there are many btree nodes,
-        * if GC only processes constant (100) nodes each time,
-        * GC would last a long time, and the front side I/Os
-        * would run out of the buckets (since no new bucket
-        * can be allocated during GC), and be blocked again.
-        * So GC should not process constant nodes, but varied
-        * nodes according to the number of btree nodes, which
-        * realized by dividing GC into constant(100) times,
-        * so when there are many btree nodes, GC can process
-        * more nodes each time, otherwise, GC will process less
-        * nodes each time (but no less than MIN_GC_NODES)
-        */
-       min_nodes = c->gc_stats.nodes / MAX_GC_TIMES;
-       if (min_nodes < MIN_GC_NODES)
-               min_nodes = MIN_GC_NODES;
+       if (atomic_read(&c->search_inflight) == 0) {
+               size_t n = c->gc_stats.nodes >> MAX_GC_TIMES_SHIFT;
+
+               if (min_nodes < n)
+                       min_nodes = n;
+       }
  
         return min_nodes;
  }
  
+static uint64_t btree_gc_sleep_ms(struct cache_set *c)
+{
+       uint64_t sleep_ms;
+
+       if (atomic_read(&c->bucket_wait_cnt) > 0)
+               sleep_ms = GC_SLEEP_MS_MIN;
+       else
+               sleep_ms = GC_SLEEP_MS;
+
+       return sleep_ms;
+}
  
  static int btree_gc_recurse(struct btree *b, struct btree_op *op,
                             struct closure *writes, struct gc_stat *gc)
@@ -1668,8 +1669,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
                 memmove(r + 1, r, sizeof(r[0]) * (GC_MERGE_NODES - 1));
                 r->b = NULL;
  
-               if (atomic_read(&b->c->search_inflight) &&
-                   gc->nodes >= gc->nodes_pre + btree_gc_min_nodes(b->c)) {
+               if (gc->nodes >= (gc->nodes_pre + btree_gc_min_nodes(b->c))) {
                         gc->nodes_pre =  gc->nodes;
                         ret = -EAGAIN;
                         break;
@@ -1846,8 +1846,8 @@ static void bch_btree_gc(struct cache_set *c)
                 cond_resched();
  
                 if (ret == -EAGAIN)
-                       schedule_timeout_interruptible(msecs_to_jiffies
-                                                      (GC_SLEEP_MS));
+                       schedule_timeout_interruptible(
+                               msecs_to_jiffies(btree_gc_sleep_ms(c)));
                 else if (ret)
                         pr_warn("gc failed!\n");
         } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
author	Coly Li <colyli@fnnas.com>
	Thu, 13 Nov 2025 05:36:26 +0000 (13:36 +0800)
committer	Jens Axboe <axboe@kernel.dk>
	Thu, 13 Nov 2025 16:18:06 +0000 (09:18 -0700)
drivers/md/bcache/alloc.c		patch \| blob \| blame \| history
drivers/md/bcache/bcache.h		patch \| blob \| blame \| history
drivers/md/bcache/btree.c		patch \| blob \| blame \| history