From 0789effd55d2f716ff2d03db1e4680fe862f10ef Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 26 Aug 2025 19:50:48 +0000 Subject: [PATCH] overidx: take Getopt::Long options hashref directly Taking the Getopt::Long options hashref directly will allow us to support future options more easily and avoid copying/mapping fields (e.g. {-no_fsync}, {journal_mode}) across different object types). --- lib/PublicInbox/ExtSearchIdx.pm | 6 ++---- lib/PublicInbox/InboxWritable.pm | 1 - lib/PublicInbox/LeiSavedSearch.pm | 5 ++--- lib/PublicInbox/LeiStore.pm | 1 + lib/PublicInbox/Msgmap.pm | 2 +- lib/PublicInbox/Over.pm | 11 +++++------ lib/PublicInbox/OverIdx.pm | 6 +++--- lib/PublicInbox/SearchIdx.pm | 6 ++---- lib/PublicInbox/V2Writable.pm | 3 ++- script/public-inbox-index | 2 +- t/over.t | 6 +++++- 11 files changed, 24 insertions(+), 25 deletions(-) diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index e3ac80910..cb8a992ae 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -93,11 +93,9 @@ sub new { $l !~ $PublicInbox::SearchIdx::INDEXLEVELS and die "invalid indexlevel=$l\n"; $self->{indexlevel} = $l; - my $oidx = PublicInbox::OverIdx->new($over_file); - $oidx->{journal_mode} = 'wal' if $opt->{wal}; - $self->{-no_fsync} = $oidx->{-no_fsync} = 1 if !$opt->{fsync}; + $self->{oidx} = PublicInbox::OverIdx->new($over_file, $opt); + $self->{-no_fsync} = 1 if !$opt->{fsync}; $self->{-dangerous} = 1 if $opt->{dangerous}; - $self->{oidx} = $oidx; $self } diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 538e32b24..5dcbd8286 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -37,7 +37,6 @@ sub _init_v1 { require PublicInbox::SearchIdx; require PublicInbox::Msgmap; my $sidx = PublicInbox::SearchIdx->new($self, $opt); - $sidx->{oidx}->{journal_mode} = 'wal' if $opt->{wal}; $sidx->begin_txn_lazy; my $mm = PublicInbox::Msgmap->new_file($self, $opt); if (defined $skip_artnum) { diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm index 3361f758a..b78e460f7 100644 --- a/lib/PublicInbox/LeiSavedSearch.pm +++ b/lib/PublicInbox/LeiSavedSearch.pm @@ -232,9 +232,8 @@ sub prepare_dedupe { $self->{oidx} // do { my $creat = !-f $self->{-ovf}; my $lk = $self->lock_for_scope; # git-config doesn't wait - my $oidx = PublicInbox::OverIdx->new($self->{-ovf}); - $oidx->{-no_fsync} = 1; - $oidx->{journal_mode} = 'WAL'; + my $oidx = PublicInbox::OverIdx->new($self->{-ovf}, + { wal => 1 }); $oidx->dbh; $oidx->eidx_prep if $creat; # for xref3 $self->{oidx} = $oidx diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index 9d97ef58a..f7fa4723b 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -42,6 +42,7 @@ use PublicInbox::DS qw(add_uniq_timer); sub new { my (undef, $dir, $opt) = @_; + $opt->{wal} = 1; my $eidx = PublicInbox::ExtSearchIdx->new($dir, $opt); my $self = bless { priv_eidx => $eidx }, __PACKAGE__; eidx_init($self)->done if $opt->{creat}; diff --git a/lib/PublicInbox/Msgmap.pm b/lib/PublicInbox/Msgmap.pm index d7eb24673..9ef87d789 100644 --- a/lib/PublicInbox/Msgmap.pm +++ b/lib/PublicInbox/Msgmap.pm @@ -28,7 +28,7 @@ sub new_file { return if !$rw && !-r $f; my $self = bless { filename => $f }, $class; - $self->{journal_mode} = 'wal' if $opt->{wal}; + $self->{-opt} = $opt if $opt; # for WAL my $dbh = $self->{dbh} = PublicInbox::Over::dbh_new($self, $rw); if ($rw) { $dbh->begin_work; diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index 0ecdae3b9..6b24dfdb7 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -18,6 +18,7 @@ use PublicInbox::SQLiteUtil; sub dbh_new { my ($self, $rw) = @_; my $f = delete $self->{filename}; + my $opt = $self->{-opt}; if (!-s $f) { if ($rw) { PublicInbox::SQLiteUtil::create_db $f; @@ -59,16 +60,14 @@ sub dbh_new { # If an admin is willing to give read-only daemons R/W # permissions; they can enable WAL manually and we will # respect that by not clobbering it. - my $jm = $self->{journal_mode}; # set by lei - if (defined $jm) { - $dbh->do('PRAGMA journal_mode = '.$jm); + if ($opt->{wal}) { + $dbh->do('PRAGMA journal_mode = wal'); } else { - $jm = $dbh->selectrow_array('PRAGMA journal_mode'); + my $jm = $dbh->selectrow_array('PRAGMA journal_mode'); $jm eq 'wal' or $dbh->do('PRAGMA journal_mode = TRUNCATE'); } - - $dbh->do('PRAGMA synchronous = OFF') if $rw > 1; + $opt->{fsync} or $dbh->do('PRAGMA synchronous = OFF') } $dbh; } diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index c9cd44646..6e36531a9 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -21,7 +21,7 @@ use bytes (); # length sub dbh_new { my ($self) = @_; - my $dbh = $self->SUPER::dbh_new($self->{-no_fsync} ? 2 : 1); + my $dbh = $self->SUPER::dbh_new(1); # 80000 pages (80MiB on SQLite <3.12.0, 320MiB on 3.12.0+) # was found to be good in 2018 during the large LKML import @@ -34,8 +34,9 @@ sub dbh_new { } sub new { - my ($class, $f) = @_; + my ($class, $f, $opt) = @_; my $self = $class->SUPER::new($f); + $self->{-opt} = $opt; $self->{min_tid} = 0; $self; } @@ -474,7 +475,6 @@ sub create { my ($dir) = ($fn =~ m!(.*?/)[^/]+\z!); File::Path::mkpath($dir); } - $self->{journal_mode} = 'wal' if $opt->{wal}; # create the DB: PublicInbox::Over::dbh($self); $self->dbh_close; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 046e12390..6e05f9567 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -81,9 +81,8 @@ sub new { } if ($version == 1) { $self->{lock_path} = "$inboxdir/ssoma.lock"; - my $dir = $self->xdir; - $self->{oidx} = PublicInbox::OverIdx->new("$dir/over.sqlite3"); - $self->{oidx}->{-no_fsync} = 1 if $ibx->{-no_fsync}; + $self->{oidx} = PublicInbox::OverIdx->new( + $self->xdir.'/over.sqlite3', $creat_opt); } elsif ($version == 2) { defined $shard or die "shard is required for v2\n"; # shard is a number @@ -1115,7 +1114,6 @@ sub _index_sync { local $SIG{QUIT} = $quit; local $SIG{INT} = $quit; local $SIG{TERM} = $quit; - $self->{oidx}->{journal_mode} = 'wal' if $opt->{wal}; my $xdb = $self->begin_txn_lazy; $self->{oidx}->rethread_prepare($opt); my $mm = v1_mm_init $self; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 2f869817b..09e2777ce 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -64,7 +64,8 @@ sub new { total_bytes => 0, current_info => '', xpfx => $xpfx, - oidx => PublicInbox::OverIdx->new("$xpfx/over.sqlite3"), + oidx => PublicInbox::OverIdx->new("$xpfx/over.sqlite3", + $creat_opt), lock_path => "$dir/inbox.lock", # limit each git repo (epoch) to 1GB or so rotate_bytes => int((1024 * 1024 * 1024) / $PACKING_FACTOR), diff --git a/script/public-inbox-index b/script/public-inbox-index index e9832f4d8..b4965ec95 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -95,7 +95,7 @@ for my $ei_name (@$update_extindex) { my $mods = {}; my @eidx_unconfigured; foreach my $ibx (@ibxs) { - $ibx = PublicInbox::InboxWritable->new($ibx); + $ibx = PublicInbox::InboxWritable->new($ibx, $opt); # detect_indexlevel may also set $ibx->{-skip_docdata} my $detected = $ibx->detect_indexlevel; # XXX: users can shoot themselves in the foot, with opt->{indexlevel} diff --git a/t/over.t b/t/over.t index 1f2df7cf0..6802cc791 100644 --- a/t/over.t +++ b/t/over.t @@ -27,9 +27,13 @@ $over->dbh_close; $over = PublicInbox::Over->new("$tmpdir/over.sqlite3"); ok($over->dbh->{ReadOnly}, 'Over is ReadOnly'); +my $jm = $over->dbh->selectrow_array('PRAGMA journal_mode'); +isnt $jm, 'wal', 'journal_mode is not WAL by default'; -$over = PublicInbox::OverIdx->new("$tmpdir/over.sqlite3"); +$over = PublicInbox::OverIdx->new("$tmpdir/over.sqlite3", { wal => 1 }); $over->dbh; +$jm = $over->dbh->selectrow_array('PRAGMA journal_mode'); +is $jm, 'wal', "`wal' option respected"; is($over->sid('hello-world'), $x, 'idempotent across reopen'); $over->each_by_mid('never', sub { fail('should not be called') }); -- 2.47.3