From e16ffc2bee661cbc12b7837a2d03c50aa87ac08b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 26 Aug 2025 19:50:41 +0000 Subject: [PATCH] extindex|v2: defrag SQLite and Xapian DBs on btrfs Doing periodic defrags ought to improve performance and perhaps allow CoW to be usable with btrfs. The autodefrag mount option of btrfs(5) doesn't seem recommended by btrfs developers since it's too aggressive, defragments too often, and wears out devices. Performing defrag on our end should allow users to tune a more ideal defrag interval to maintain performance while avoiding excessive device wear. --- devel/sysdefs-list | 16 ++++++++++++- lib/PublicInbox/OverIdx.pm | 1 + lib/PublicInbox/SearchIdx.pm | 20 +++++++++++++++- lib/PublicInbox/Syscall.pm | 45 +++++++++++++++++++++++++---------- lib/PublicInbox/V2Writable.pm | 30 +++++++++++++++++++++++ script/public-inbox-extindex | 2 +- script/public-inbox-index | 2 +- 7 files changed, 99 insertions(+), 17 deletions(-) diff --git a/devel/sysdefs-list b/devel/sysdefs-list index 2f4ac587e..f84befe3e 100755 --- a/devel/sysdefs-list +++ b/devel/sysdefs-list @@ -33,7 +33,17 @@ for (qw(sys/ioctl sys/filio)) { ($Config{"i_$cfg_name"} // '') eq 'define' and push @cflags, "-DHAVE_${cpp_name}_H"; } -system($cc, '-o', $x, $f, @cflags) == 0 or die "$cc failed \$?=$?"; +my @cc_cmd = ($cc, '-o', $x, $f, @cflags); +if ($^O eq 'linux') { + if (system @cc_cmd, '-DHAVE_LINUX_BTRFS_H=1') { + warn "W: `@cc_cmd' failed w/ linux/btrfs.h, trying w/o ...\n"; + } else { + @cc_cmd = (); + } +} +if (@cc_cmd) { + system(@cc_cmd) == 0 or die "`@cc_cmd' failed \$?=$?"; +} print STDERR '# %Config', (map { " $_=$Config{$_}" } qw(ptrsize sizesize lseeksize)), "\n"; exit(system($x)); # exit is to ensure File::Temp::Dir->DESTROY fires @@ -57,6 +67,9 @@ __DATA__ # include # include # include +# ifdef HAVE_LINUX_BTRFS_H +# include +# endif #endif #include #include @@ -134,6 +147,7 @@ int main(void) MAYBE X(FS_IOC_GETFLAGS); MAYBE X(FS_IOC_SETFLAGS); + MAYBE X(BTRFS_IOC_DEFRAG); MAYBE D(SYS_renameat2); diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index 93f2f11b2..c9cd44646 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -340,6 +340,7 @@ INSERT INTO id2num (id, num) VALUES (?,?) my $id = mid2id($self, $mid); $sth->execute($id, $num); } + $self->{-art_max} = $num if $num > ($self->{-art_max} // 0); } sub _remove_oid { diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index bbae2e015..1b8f0f612 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -14,6 +14,7 @@ use parent qw(PublicInbox::Search PublicInbox::Lock PublicInbox::Umask use PublicInbox::Eml; use PublicInbox::DS qw(now); use PublicInbox::Search qw(xap_terms); +use PublicInbox::Syscall qw(defrag_file); use PublicInbox::InboxWritable; use PublicInbox::MID qw(mids_for_index mids); use PublicInbox::MsgIter; @@ -36,6 +37,7 @@ our ($DB_CREATE_OR_OPEN, $DB_OPEN); our $DB_NO_SYNC = 0; our $DB_DANGEROUS = 0; our $CHECKPOINT_INTVL = 5; # seconds +our $DEFRAG_NR = 100000; # document count our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff : # assume a typical 64-bit system has 8x more RAM than a # typical 32-bit system: @@ -148,7 +150,6 @@ sub idx_acquire { if (!-d $dir && (!$is_shard || ($is_shard && need_xapian($self)))) { File::Path::mkpath($dir); - require PublicInbox::Syscall; PublicInbox::Syscall::nodatacow_dir($dir); # owner == self for CodeSearchIdx $self->{-set_has_threadid_once} = 1 if $owner != $self; @@ -1215,4 +1216,21 @@ sub eidx_shard_new { $self; } +# calculate the next article number to defrag at +sub next_defrag ($$) { + my ($num, $opt) = @_; + my $nr = ($opt->{defrag} // $DEFRAG_NR) || return; + $num ||= 1; # num == 0 on new DB + $num + $nr - ($num % $nr); +} + +sub defrag_xdir { + my ($self) = @_; + # e.g. xap15/[0123]/*.{glass,honey}, skip flintlock+iam{glass,*} + for (glob($self->xdir.'/*.*')) { + next if /\.sqlite3/; # v1 has over.sqlite3* + last unless defrag_file $_ + } +} + 1; diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm index b1cb4e688..1045394ac 100644 --- a/lib/PublicInbox/Syscall.pm +++ b/lib/PublicInbox/Syscall.pm @@ -32,7 +32,7 @@ our @EXPORT_OK = qw(epoll_create EPOLLIN EPOLLOUT EPOLLET EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD EPOLLONESHOT EPOLLEXCLUSIVE - rename_noreplace %SIGNUM $F_SETPIPE_SZ); + rename_noreplace %SIGNUM $F_SETPIPE_SZ defrag_file); use constant { EPOLLIN => 1, EPOLLOUT => 4, @@ -62,7 +62,8 @@ our ($SYS_epoll_create, $SYS_recvmsg); my $SYS_fstatfs; # don't need fstatfs64, just statfs.f_type -my ($FS_IOC_GETFLAGS, $FS_IOC_SETFLAGS, $SYS_writev); +my ($FS_IOC_GETFLAGS, $FS_IOC_SETFLAGS, $SYS_writev, + $BTRFS_IOC_DEFRAG); my $SFD_CLOEXEC = 02000000; # Perl does not expose O_CLOEXEC our $no_deprecated = 0; @@ -105,6 +106,7 @@ if ($^O eq "linux") { }; $FS_IOC_GETFLAGS = 0x80046601; $FS_IOC_SETFLAGS = 0x40046602; + $BTRFS_IOC_DEFRAG = 0x50009402; } elsif ($machine eq "x86_64") { $SYS_epoll_create = 213; $SYS_epoll_ctl = 233; @@ -121,6 +123,7 @@ if ($^O eq "linux") { }; $FS_IOC_GETFLAGS = 0x80086601; $FS_IOC_SETFLAGS = 0x40086602; + $BTRFS_IOC_DEFRAG = 0x50009402; } elsif ($machine eq 'x32') { $SYS_epoll_create = 1073742037; $SYS_epoll_ctl = 1073742057; @@ -435,23 +438,40 @@ sub rename_noreplace ($$) { } } -# returns "0 but true" on success, undef or -sub nodatacow_fh ($) { +sub is_btrfs ($) { my ($fh) = @_; my $buf = "\0" x 120; - syscall($SYS_fstatfs // return, fileno($fh), $buf) == 0 or - return warn("fstatfs: $!\n"); + if (syscall($SYS_fstatfs // return, fileno($fh), $buf) != 0) { + warn "fstatfs: $!\n"; + return; + } my $f_type = unpack($FSWORD_T, $buf); - return if $f_type != 0x9123683E; # BTRFS_SUPER_MAGIC + $f_type == 0x9123683E; # BTRFS_SUPER_MAGIC +} + +# returns "0 but true" on success, undef on noop, true != 0 on failure +sub defrag_file ($) { + my ($file) = @_; + open my $fh, '+<', $file or return; + is_btrfs $fh or return; + $BTRFS_IOC_DEFRAG // + return warn 'BTRFS_IOC_DEFRAG undefined for architecture'; + ioctl $fh, $BTRFS_IOC_DEFRAG, 0; +} + +# returns "0 but true" on success, undef on noop, true != 0 on failure +sub nodatacow_fh ($) { + my ($fh) = @_; + return unless is_btrfs $fh; $FS_IOC_GETFLAGS // - return warn('FS_IOC_GETFLAGS undefined for platform'); - ioctl($fh, $FS_IOC_GETFLAGS, $buf) // - return warn("FS_IOC_GETFLAGS: $!\n"); + return (undef, warn 'FS_IOC_GETFLAGS undefined for platform'); + ioctl($fh, $FS_IOC_GETFLAGS, my $buf = "\0\0\0\0") // + return (undef, warn "FS_IOC_GETFLAGS: $!"); my $attr = unpack('l!', $buf); return if ($attr & 0x00800000); # FS_NOCOW_FL; ioctl($fh, $FS_IOC_SETFLAGS, pack('l', $attr | 0x00800000)) // - warn("FS_IOC_SETFLAGS: $!\n"); + return (undef, warn "FS_IOC_SETFLAGS: $!"); } sub nodatacow_dir ($) { @@ -461,8 +481,7 @@ sub nodatacow_dir ($) { $rc && $rc == 0 and warn <autoflush use POSIX (); @@ -220,6 +221,8 @@ sub _idx_init { # with_umask callback $self->{shards} = $nshards if $nshards && $nshards != $self->{shards}; $self->{batch_bytes} = $opt->{batch_size} // $PublicInbox::SearchIdx::BATCH_BYTES; + $self->{defrag_at} = + PublicInbox::SearchIdx::next_defrag $self->{oidx}->max, $opt; # need to create all shards before initializing msgmap FD # idx_shards must be visible to all forked processes @@ -510,6 +513,29 @@ sub set_last_commits ($) { # this is NOT for ExtSearchIdx } } +sub do_defrag ($) { + my ($self) = @_; + my ($pr, $t0) = ($self->{-opt}->{-progress}, now); + + # parallel shards, but each *.{glass,honey,etc.} is synchronous + $_->ipc_do('defrag_xdir') for @{$self->{idx_shards} // []}; + + # TODO: parallelize SQLite defrags? + if (my $df_ok = defrag_file $self->{oidx}->dbh->sqlite_db_filename) { + $self->{mm} and # v2 only, not -extindex + defrag_file $self->{mm}->{dbh}->sqlite_db_filename; + $self->{defrag_at} = PublicInbox::SearchIdx::next_defrag + $self->{oidx}->{-art_max}, + $self->{-opt}; + $pr->('defrag took ', + sprintf('%ums', now - $t0), + ", next defrag: >=#$self->{defrag_at} ", + "(cur: $self->{oidx}->{-art_max})\n"); + } else { # defrag not supported (or needed, maybe) + delete $self->{defrag_at}; + } +} + # public sub checkpoint ($;$) { my ($self, $wait) = @_; @@ -529,6 +555,10 @@ sub checkpoint ($;$) { # (non-parallel waits here) $_->ipc_do('commit_txn_lazy') for @$shards; + defined($self->{defrag_at}) and + ($self->{oidx}->{-art_max}//0) >= $self->{defrag_at} and + do_defrag $self; + # transactions started on parallel shards, # wait for them by issuing an echo command (echo can only # run after commit_txn_lazy is done) diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index 6b1b06c72..b49577ef5 100755 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -28,7 +28,7 @@ See public-inbox-extindex(1) man page for full documentation. EOF my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 }; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i - fsync|sync! fast dangerous wal + fsync|sync! fast dangerous wal defrag=i indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s dedupe:s@ gc commit-interval=i watch scan! dry-run|n diff --git a/script/public-inbox-index b/script/public-inbox-index index 72f561381..e9832f4d8 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -38,7 +38,7 @@ my $opt = { GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune fsync|sync! xapian_only|xapian-only dangerous wal indexlevel|index-level|L=s max_size|max-size=s - batch_size|batch-size=s + defrag=i batch_size|batch-size=s since|after=s until|before=s sequential-shard|seq-shard multi-pack-index! -- 2.47.3