From 9d64d5f18313b1063696c438af5f6f827a483ab6 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 27 Oct 2025 17:56:14 +0000 Subject: [PATCH] searchidx: split shards at 100000 docs by default Testing on a busy btrfs system with indexlevel=medium reveals another ~15% speedup compared to the previous 450000 value since shards are smaller and less prone to slowdown. The smaller splits should also work better with indexlevel=full (the default) since full indexing with positions takes up the bulk of the space. --- Documentation/public-inbox-index.pod | 2 +- lib/PublicInbox/SearchIdx.pm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod index 35ec8cb7b..9d6c6a2ff 100644 --- a/Documentation/public-inbox-index.pod +++ b/Documentation/public-inbox-index.pod @@ -233,7 +233,7 @@ Implies and used for tuning C<--split-shards>. The default is likely fine. Tiny values may overflow system file/command-line limits while giant values negate the performance benefit. -Default: 450000 +Default: 100000 =item --multipass diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index a9a0e505d..13bf4544c 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -41,7 +41,7 @@ our $DB_NO_SYNC = 0; our $DB_DANGEROUS = 0; our $CHECKPOINT_INTVL = 15; # seconds our $DEFRAG_NR = 100000; # document count -our $SHARD_SPLIT_AT = 450000; # document count +our $SHARD_SPLIT_AT = 100000; # document count our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff : # assume a typical 64-bit system has 8x more RAM than a # typical 32-bit system: -- 2.47.3