($Config{"i_$cfg_name"} // '') eq 'define' and
push @cflags, "-DHAVE_${cpp_name}_H";
}
-system($cc, '-o', $x, $f, @cflags) == 0 or die "$cc failed \$?=$?";
+my @cc_cmd = ($cc, '-o', $x, $f, @cflags);
+if ($^O eq 'linux') {
+ if (system @cc_cmd, '-DHAVE_LINUX_BTRFS_H=1') {
+ warn "W: `@cc_cmd' failed w/ linux/btrfs.h, trying w/o ...\n";
+ } else {
+ @cc_cmd = ();
+ }
+}
+if (@cc_cmd) {
+ system(@cc_cmd) == 0 or die "`@cc_cmd' failed \$?=$?";
+}
print STDERR '# %Config',
(map { " $_=$Config{$_}" } qw(ptrsize sizesize lseeksize)), "\n";
exit(system($x)); # exit is to ensure File::Temp::Dir->DESTROY fires
# include <sys/epoll.h>
# include <sys/inotify.h>
# include <sys/vfs.h>
+# ifdef HAVE_LINUX_BTRFS_H
+# include <linux/btrfs.h>
+# endif
#endif
#include <sys/types.h>
#include <fcntl.h>
MAYBE X(FS_IOC_GETFLAGS);
MAYBE X(FS_IOC_SETFLAGS);
+ MAYBE X(BTRFS_IOC_DEFRAG);
MAYBE D(SYS_renameat2);
my $id = mid2id($self, $mid);
$sth->execute($id, $num);
}
+ $self->{-art_max} = $num if $num > ($self->{-art_max} // 0);
}
sub _remove_oid {
use PublicInbox::Eml;
use PublicInbox::DS qw(now);
use PublicInbox::Search qw(xap_terms);
+use PublicInbox::Syscall qw(defrag_file);
use PublicInbox::InboxWritable;
use PublicInbox::MID qw(mids_for_index mids);
use PublicInbox::MsgIter;
our $DB_NO_SYNC = 0;
our $DB_DANGEROUS = 0;
our $CHECKPOINT_INTVL = 5; # seconds
+our $DEFRAG_NR = 100000; # document count
our $BATCH_BYTES = $ENV{XAPIAN_FLUSH_THRESHOLD} ? 0x7fffffff :
# assume a typical 64-bit system has 8x more RAM than a
# typical 32-bit system:
if (!-d $dir && (!$is_shard ||
($is_shard && need_xapian($self)))) {
File::Path::mkpath($dir);
- require PublicInbox::Syscall;
PublicInbox::Syscall::nodatacow_dir($dir);
# owner == self for CodeSearchIdx
$self->{-set_has_threadid_once} = 1 if $owner != $self;
$self;
}
+# calculate the next article number to defrag at
+sub next_defrag ($$) {
+ my ($num, $opt) = @_;
+ my $nr = ($opt->{defrag} // $DEFRAG_NR) || return;
+ $num ||= 1; # num == 0 on new DB
+ $num + $nr - ($num % $nr);
+}
+
+sub defrag_xdir {
+ my ($self) = @_;
+ # e.g. xap15/[0123]/*.{glass,honey}, skip flintlock+iam{glass,*}
+ for (glob($self->xdir.'/*.*')) {
+ next if /\.sqlite3/; # v1 has over.sqlite3*
+ last unless defrag_file $_
+ }
+}
+
1;
EPOLLIN EPOLLOUT EPOLLET
EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
EPOLLONESHOT EPOLLEXCLUSIVE
- rename_noreplace %SIGNUM $F_SETPIPE_SZ);
+ rename_noreplace %SIGNUM $F_SETPIPE_SZ defrag_file);
use constant {
EPOLLIN => 1,
EPOLLOUT => 4,
$SYS_recvmsg);
my $SYS_fstatfs; # don't need fstatfs64, just statfs.f_type
-my ($FS_IOC_GETFLAGS, $FS_IOC_SETFLAGS, $SYS_writev);
+my ($FS_IOC_GETFLAGS, $FS_IOC_SETFLAGS, $SYS_writev,
+ $BTRFS_IOC_DEFRAG);
my $SFD_CLOEXEC = 02000000; # Perl does not expose O_CLOEXEC
our $no_deprecated = 0;
};
$FS_IOC_GETFLAGS = 0x80046601;
$FS_IOC_SETFLAGS = 0x40046602;
+ $BTRFS_IOC_DEFRAG = 0x50009402;
} elsif ($machine eq "x86_64") {
$SYS_epoll_create = 213;
$SYS_epoll_ctl = 233;
};
$FS_IOC_GETFLAGS = 0x80086601;
$FS_IOC_SETFLAGS = 0x40086602;
+ $BTRFS_IOC_DEFRAG = 0x50009402;
} elsif ($machine eq 'x32') {
$SYS_epoll_create = 1073742037;
$SYS_epoll_ctl = 1073742057;
}
}
-# returns "0 but true" on success, undef or
-sub nodatacow_fh ($) {
+sub is_btrfs ($) {
my ($fh) = @_;
my $buf = "\0" x 120;
- syscall($SYS_fstatfs // return, fileno($fh), $buf) == 0 or
- return warn("fstatfs: $!\n");
+ if (syscall($SYS_fstatfs // return, fileno($fh), $buf) != 0) {
+ warn "fstatfs: $!\n";
+ return;
+ }
my $f_type = unpack($FSWORD_T, $buf);
- return if $f_type != 0x9123683E; # BTRFS_SUPER_MAGIC
+ $f_type == 0x9123683E; # BTRFS_SUPER_MAGIC
+}
+
+# returns "0 but true" on success, undef on noop, true != 0 on failure
+sub defrag_file ($) {
+ my ($file) = @_;
+ open my $fh, '+<', $file or return;
+ is_btrfs $fh or return;
+ $BTRFS_IOC_DEFRAG //
+ return warn 'BTRFS_IOC_DEFRAG undefined for architecture';
+ ioctl $fh, $BTRFS_IOC_DEFRAG, 0;
+}
+
+# returns "0 but true" on success, undef on noop, true != 0 on failure
+sub nodatacow_fh ($) {
+ my ($fh) = @_;
+ return unless is_btrfs $fh;
$FS_IOC_GETFLAGS //
- return warn('FS_IOC_GETFLAGS undefined for platform');
- ioctl($fh, $FS_IOC_GETFLAGS, $buf) //
- return warn("FS_IOC_GETFLAGS: $!\n");
+ return (undef, warn 'FS_IOC_GETFLAGS undefined for platform');
+ ioctl($fh, $FS_IOC_GETFLAGS, my $buf = "\0\0\0\0") //
+ return (undef, warn "FS_IOC_GETFLAGS: $!");
my $attr = unpack('l!', $buf);
return if ($attr & 0x00800000); # FS_NOCOW_FL;
ioctl($fh, $FS_IOC_SETFLAGS, pack('l', $attr | 0x00800000)) //
- warn("FS_IOC_SETFLAGS: $!\n");
+ return (undef, warn "FS_IOC_SETFLAGS: $!");
}
sub nodatacow_dir ($) {
$rc && $rc == 0 and warn <<EOM;
W: Disabling copy-on-write (CoW) on `$f'
W: to avoid pathological slowdowns. Data corruption may occur on unclean
-W: shutdowns, especially if using any form of BTRFS RAID. Periodic defrag
-W: is recommended for *.sqlite3 and *.glass files to maintain performance.
+W: shutdowns, especially if using any form of BTRFS RAID.
EOM
}
}
use PublicInbox::Search;
use PublicInbox::SearchIdx qw(log2stack is_ancestor check_size is_bad_blob
update_checkpoint);
+use PublicInbox::Syscall qw(defrag_file);
use PublicInbox::DS qw(now);
use IO::Handle; # ->autoflush
use POSIX ();
$self->{shards} = $nshards if $nshards && $nshards != $self->{shards};
$self->{batch_bytes} = $opt->{batch_size} //
$PublicInbox::SearchIdx::BATCH_BYTES;
+ $self->{defrag_at} =
+ PublicInbox::SearchIdx::next_defrag $self->{oidx}->max, $opt;
# need to create all shards before initializing msgmap FD
# idx_shards must be visible to all forked processes
}
}
+sub do_defrag ($) {
+ my ($self) = @_;
+ my ($pr, $t0) = ($self->{-opt}->{-progress}, now);
+
+ # parallel shards, but each *.{glass,honey,etc.} is synchronous
+ $_->ipc_do('defrag_xdir') for @{$self->{idx_shards} // []};
+
+ # TODO: parallelize SQLite defrags?
+ if (my $df_ok = defrag_file $self->{oidx}->dbh->sqlite_db_filename) {
+ $self->{mm} and # v2 only, not -extindex
+ defrag_file $self->{mm}->{dbh}->sqlite_db_filename;
+ $self->{defrag_at} = PublicInbox::SearchIdx::next_defrag
+ $self->{oidx}->{-art_max},
+ $self->{-opt};
+ $pr->('defrag took ',
+ sprintf('%ums', now - $t0),
+ ", next defrag: >=#$self->{defrag_at} ",
+ "(cur: $self->{oidx}->{-art_max})\n");
+ } else { # defrag not supported (or needed, maybe)
+ delete $self->{defrag_at};
+ }
+}
+
# public
sub checkpoint ($;$) {
my ($self, $wait) = @_;
# (non-parallel waits here)
$_->ipc_do('commit_txn_lazy') for @$shards;
+ defined($self->{defrag_at}) and
+ ($self->{oidx}->{-art_max}//0) >= $self->{defrag_at} and
+ do_defrag $self;
+
# transactions started on parallel shards,
# wait for them by issuing an echo command (echo can only
# run after commit_txn_lazy is done)
EOF
my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 };
GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i
- fsync|sync! fast dangerous wal
+ fsync|sync! fast dangerous wal defrag=i
indexlevel|index-level|L=s max_size|max-size=s
batch_size|batch-size=s
dedupe:s@ gc commit-interval=i watch scan! dry-run|n
GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune
fsync|sync! xapian_only|xapian-only dangerous wal
indexlevel|index-level|L=s max_size|max-size=s
- batch_size|batch-size=s
+ defrag=i batch_size|batch-size=s
since|after=s until|before=s
sequential-shard|seq-shard
multi-pack-index!