From db671788b33d606b9014479c4530194c6759d7b5 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 26 Aug 2025 19:50:52 +0000 Subject: [PATCH] support --cow switch to preserve CoW on btrfs We currently unconditionally disable CoW on btrfs to reduce fragmentation. Unfortunately, disabling CoW may cause data corruption on all btrfs RAID levels, so provide an option to keep it enabled. In the future, CoW may become the default on btrfs (matching the FS default) even if fragmentation is awful. --- MANIFEST | 1 + lib/PublicInbox/ExtSearchIdx.pm | 1 + lib/PublicInbox/MiscIdx.pm | 5 +- lib/PublicInbox/Over.pm | 2 +- lib/PublicInbox/SQLiteUtil.pm | 10 ++-- lib/PublicInbox/SearchIdx.pm | 3 +- lib/PublicInbox/Syscall.pm | 14 ++++++ lib/PublicInbox/Xapcmd.pm | 30 ++++++++---- script/public-inbox-cindex | 2 +- script/public-inbox-convert | 2 +- script/public-inbox-extindex | 2 +- script/public-inbox-index | 2 +- script/public-inbox-init | 2 +- script/public-inbox-xcpdb | 2 +- t/cow.t | 82 +++++++++++++++++++++++++++++++++ t/extsearch.t | 11 +++++ t/nodatacow.t | 4 ++ 17 files changed, 151 insertions(+), 24 deletions(-) create mode 100644 t/cow.t diff --git a/MANIFEST b/MANIFEST index aa4d8465e..f36eb53de 100644 --- a/MANIFEST +++ b/MANIFEST @@ -458,6 +458,7 @@ t/config.t t/config_limiter.t t/content_hash.t t/convert-compact.t +t/cow.t t/daemon.t t/data-gen/.gitignore t/data/0001.patch diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 911a900f5..4719c8805 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -75,6 +75,7 @@ sub new { my $self = bless { xpfx => "$dir/ei".PublicInbox::Search::SCHEMA_VERSION, topdir => $dir, + -opt => $opt, creat => $opt->{creat}, ibx_map => {}, # (newsgroup//inboxdir) => $ibx ibx_active => [], # by config section order diff --git a/lib/PublicInbox/MiscIdx.pm b/lib/PublicInbox/MiscIdx.pm index 257bd7a83..b528902c3 100644 --- a/lib/PublicInbox/MiscIdx.pm +++ b/lib/PublicInbox/MiscIdx.pm @@ -28,9 +28,10 @@ sub new { PublicInbox::SearchIdx::load_xapian_writable(); my $mi_dir = "$eidx->{xpfx}/misc"; File::Path::mkpath($mi_dir); - PublicInbox::Syscall::nodatacow_dir($mi_dir); - my $flags = $PublicInbox::SearchIdx::DB_CREATE_OR_OPEN; my $opt = $eidx->{-opt}; + $opt->{cow} or + PublicInbox::Syscall::nodatacow_dir($mi_dir); + my $flags = $PublicInbox::SearchIdx::DB_CREATE_OR_OPEN; $flags |= $PublicInbox::SearchIdx::DB_NO_SYNC if !$opt->{fsync}; $flags |= $PublicInbox::SearchIdx::DB_DANGEROUS if $opt->{dangerous}; $json //= PublicInbox::Config::json(); diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index 6b24dfdb7..deac8dc9f 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -21,7 +21,7 @@ sub dbh_new { my $opt = $self->{-opt}; if (!-s $f) { if ($rw) { - PublicInbox::SQLiteUtil::create_db $f; + PublicInbox::SQLiteUtil::create_db $f, $opt; } else { $self->{filename} = $f; # die on stat() below: } diff --git a/lib/PublicInbox/SQLiteUtil.pm b/lib/PublicInbox/SQLiteUtil.pm index fcec9e4cc..086627999 100644 --- a/lib/PublicInbox/SQLiteUtil.pm +++ b/lib/PublicInbox/SQLiteUtil.pm @@ -28,11 +28,13 @@ sub mk_sqlite_re ($$) { : ($anywhere ? '.*' : '^')."\Q$pfx\E.*"; } -sub create_db ($) { - my ($f) = @_; - require PublicInbox::Syscall; +sub create_db ($;$) { + my ($f, $opt) = @_; my ($dir) = ($f =~ m!(.+)/[^/]+\z!); - PublicInbox::Syscall::nodatacow_dir($dir); # for journal/shm/wal + unless ($opt->{cow}) { + require PublicInbox::Syscall; + PublicInbox::Syscall::nodatacow_dir($dir); # for journal/shm/wal + } # SQLite defaults mode to 0644, we want 0666 to respect umask open my $fh, '+>>', $f; } diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index c0056ee5f..8252af281 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -150,7 +150,8 @@ sub idx_acquire { if (!-d $dir && (!$is_shard || ($is_shard && need_xapian($self)))) { File::Path::mkpath($dir); - PublicInbox::Syscall::nodatacow_dir($dir); + $self->{-opt}->{cow} or + PublicInbox::Syscall::nodatacow_dir($dir); # owner == self for CodeSearchIdx $self->{-set_has_threadid_once} = 1 if $owner != $self; $flag |= $DB_DANGEROUS if $self->{-opt}->{dangerous}; diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm index 1045394ac..e65c3301d 100644 --- a/lib/PublicInbox/Syscall.pm +++ b/lib/PublicInbox/Syscall.pm @@ -474,6 +474,20 @@ sub nodatacow_fh ($) { return (undef, warn "FS_IOC_SETFLAGS: $!"); } +# returns "0 but true" on success, undef on noop, true != 0 on failure +sub yesdatacow_fh ($) { + my ($fh) = @_; + return unless is_btrfs $fh; + $FS_IOC_GETFLAGS // + return (undef, warn 'FS_IOC_GETFLAGS undefined for platform'); + ioctl($fh, $FS_IOC_GETFLAGS, my $buf = "\0\0\0\0") // + return (undef, warn "FS_IOC_GETFLAGS: $!"); + my $attr = unpack('l!', $buf); + return unless ($attr & 0x00800000); # FS_NOCOW_FL; + ioctl($fh, $FS_IOC_SETFLAGS, pack('l', $attr & ~0x00800000)) // + return (undef, warn "FS_IOC_SETFLAGS: $!"); +} + sub nodatacow_dir ($) { my ($f) = @_; if (open my $fh, '<', $f) { diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 2ec0c073c..02b16a045 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -2,7 +2,7 @@ # License: AGPL-3.0+ package PublicInbox::Xapcmd; use v5.12; -use autodie qw(chmod opendir rename syswrite); +use autodie qw(chmod closedir open opendir rename syswrite); use PublicInbox::Spawn qw(which popen_rd); use PublicInbox::Syscall; use PublicInbox::Admin qw(setup_signals); @@ -11,7 +11,7 @@ use PublicInbox::Search qw(xap_terms); use PublicInbox::SearchIdx; use File::Temp 0.19 (); # ->newdir use File::Path qw(remove_tree); -use POSIX qw(WNOHANG _exit); +use POSIX qw(WNOHANG dup _exit); use PublicInbox::DS; # support testing with dev versions of Xapian which installs @@ -214,7 +214,8 @@ sub prepare_run { my $v = PublicInbox::Search::SCHEMA_VERSION(); my $wip = File::Temp->newdir("xapian$v-XXXX", DIR => $dir); $tmp->{$old} = $wip; - PublicInbox::Syscall::nodatacow_dir($wip->dirname); + $opt->{cow} or + PublicInbox::Syscall::nodatacow_dir($wip->dirname); push @queue, [ $old, $wip ]; } elsif (defined $old) { opendir(my $dh, $old); @@ -229,6 +230,12 @@ sub prepare_run { warn "W: skipping unknown dir: $old/$dn\n" } } + if ($opt->{cow}) { # make existing $DIR/{xap,ei}* CoW + my $dfd = dup(fileno($dh)) // die "dup: $!"; + open my $fh, '<&='.$dfd; + closedir $dh; + PublicInbox::Syscall::yesdatacow_fh($fh); + } die "No Xapian shards found in $old\n" unless @old_shards; @old_shards = sort { $a <=> $b } @old_shards; my ($src, $max_shard); @@ -242,10 +249,12 @@ sub prepare_run { } foreach my $dn (0..$max_shard) { my $wip = File::Temp->newdir("$dn-XXXX", DIR => $old); - same_fs_or_die($old, $wip->dirname); + my $wip_dn = $wip->dirname; + same_fs_or_die($old, $wip_dn); my $cur = "$old/$dn"; push @queue, [ $src // $cur , $wip ]; - PublicInbox::Syscall::nodatacow_dir($wip->dirname); + $opt->{cow} or + PublicInbox::Syscall::nodatacow_dir($wip_dn); $tmp->{$cur} = $wip; } # mark old shards to be unlinked @@ -418,13 +427,13 @@ sub xapian_write_prep ($) { (\%PublicInbox::Search::X, $flag); } -sub compact_tmp_shard ($) { - my ($wip) = @_; +sub compact_tmp_shard ($$) { + my ($wip, $opt) = @_; my $new = $wip->dirname; my ($dir) = ($new =~ m!(.*?/)[^/]+/*\z!); same_fs_or_die($dir, $new); my $ft = File::Temp->newdir("$new.compact-XXXX", DIR => $dir); - PublicInbox::Syscall::nodatacow_dir($ft->dirname); + PublicInbox::Syscall::nodatacow_dir($ft->dirname) if !$opt->{cow}; $ft; } @@ -444,7 +453,8 @@ sub cidx_reshard { # not docid based my @tmp; my @dst = map { my $wip = $_->[1]; - my $tmp = $opt->{compact} ? compact_tmp_shard($wip) : $wip; + my $tmp = $opt->{compact} ? + compact_tmp_shard($wip, $opt) : $wip; push @tmp, $tmp; $X->{WritableDatabase}->new($tmp->dirname, $flag); } @$queue; @@ -520,7 +530,7 @@ sub cpdb ($$) { # cb_spawn callback my $tmp = $wip; local @SIG{keys %SIG} = values %SIG; if ($opt->{compact}) { - $tmp = compact_tmp_shard($wip); + $tmp = compact_tmp_shard($wip, $opt); setup_signals(); } diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex index dd00623a1..e6b1e67b5 100755 --- a/script/public-inbox-cindex +++ b/script/public-inbox-cindex @@ -26,7 +26,7 @@ See public-inbox-cindex(1) man page for full documentation. EOF my $opt = { fsync => 1, scan => 1 }; # --no-scan is hidden GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous - indexlevel|index-level|L=s join:s@ + cow! indexlevel|index-level|L=s join:s@ batch_size|batch-size=s max_size|max-size=s include|I=s@ only=s@ all show:s@ project-list=s exclude=s@ project-root|r=s diff --git a/script/public-inbox-convert b/script/public-inbox-convert index 78defa935..598636c94 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -37,7 +37,7 @@ GetOptions($opt, qw(jobs|j=i index! help|h C=s@), # index options qw(verbose|v+ rethread compact|c+ fsync|sync! indexlevel|index-level|L=s max_size|max-size=s - batch_size|batch-size=s wal + batch_size|batch-size=s cow! wal sequential-shard|seq-shard )) or die $help; if ($opt->{help}) { print $help; exit 0 }; diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index b49577ef5..2ba0a607f 100755 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -28,7 +28,7 @@ See public-inbox-extindex(1) man page for full documentation. EOF my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 }; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i - fsync|sync! fast dangerous wal defrag=i + fsync|sync! fast dangerous cow! wal defrag=i indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s dedupe:s@ gc commit-interval=i watch scan! dry-run|n diff --git a/script/public-inbox-index b/script/public-inbox-index index acdec3a9c..0eb88ba14 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -36,7 +36,7 @@ my $opt = { 'update-extindex' => [], # ":s@" optional arg sets '' if no arg given }; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune - fsync|sync! xapian_only|xapian-only dangerous wal + fsync|sync! xapian_only|xapian-only dangerous cow! wal indexlevel|index-level|L=s max_size|max-size=s defrag=i batch_size|batch-size=s since|after=s until|before=s diff --git a/script/public-inbox-init b/script/public-inbox-init index f2291b05c..dfb3dbb77 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -39,7 +39,7 @@ my $usage_cb = sub { exit 1; }; GetOptions(my $opt = {}, qw(version|V=i - wal indexlevel|index-level|L=s + cow! wal indexlevel|index-level|L=s skip-epoch|skip|S=i skip-artnum=i jobs|j=i newsgroup|ng=s skip-docdata help|h diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb index fac54559f..e93d41e05 100755 --- a/script/public-inbox-xcpdb +++ b/script/public-inbox-xcpdb @@ -28,7 +28,7 @@ EOF my $opt = { quiet => -1, compact => 0, fsync => 1, -eidx_ok => 1, -cidx_ok => 1 }; GetOptions($opt, qw( - fsync|sync! compact|c reshard|R=i + cow! fsync|sync! compact|c reshard|R=i max_size|max-size=s batch_size|batch-size=s sequential-shard|seq-shard jobs|j=i quiet|q verbose|v diff --git a/t/cow.t b/t/cow.t new file mode 100644 index 000000000..0d9dfd5f5 --- /dev/null +++ b/t/cow.t @@ -0,0 +1,82 @@ +# Copyright (C) all contributors +# License: AGPL-3.0+ +use v5.12; use autodie; use PublicInbox::TestCommon; +use File::Temp 0.19; +use PublicInbox::IO; +my $dir = $ENV{BTRFS_TESTDIR}; +plan skip_all => 'BTRFS_TESTDIR not defined' if !$dir; +plan skip_all => 'test is Linux-only' if $^O ne 'linux'; +require_mods 'v2'; +my $lsattr = require_cmd 'lsattr'; +my $tmp = File::Temp->newdir('cow-XXXX', DIR => $dir); +local $ENV{PI_CONFIG} = "$tmp/pi-cfg"; + +PublicInbox::IO::write_file '>', $ENV{PI_CONFIG}, < 'c@example.com' }, + { 0 => \($eml->as_string) }); + +$lsa = xqx([$lsattr, '-l', "$tmp/b/public-inbox"]); +unlike $lsa, qr/\bNo_COW\b/, 'No_COW not set' or + diag explain($lsa); + +ok run_script([qw(-convert --cow), "$tmp/c", "$tmp/c2"]), + '-convert --cow'; +$lsa = xqx([$lsattr, '-lR', glob("$tmp/c2/xap*/")]); +unlike $lsa, qr/\bNo_COW\b/i, 'CoW preserved w/ -convert --cow'; + +ok run_script([qw(-convert), "$tmp/c", "$tmp/C2"]), + '-convert w/o --cow'; +$lsa = xqx([$lsattr, '-lR', glob("$tmp/C2/xap*/")]); +like $lsa, qr/\bNo_COW\b/i, '-convert unsets CoW w/o --cow'; + +ok run_script([qw(-index --cow -L medium), "$tmp/c2"]), + '-index -V2 --cow + Xapian'; +$lsa = xqx([$lsattr, '-lR', "$tmp/c2/", glob("$tmp/c2/xap*/")]); +unlike $lsa, qr/\bNo_COW\b/i, 'CoW preserved w/ -convert --cow + Xapian'; + +ok run_script([qw(-xcpdb -R1 --cow), "$tmp/c2"]), + 'xcpdb respects --cow'; +$lsa = xqx([$lsattr, '-lR', glob("$tmp/c2/xap*/")]); +unlike $lsa, qr/\bNo_COW\b/i, '-xcpdb --cow works'; + +done_testing; diff --git a/t/extsearch.t b/t/extsearch.t index 5c9b6dbe0..08fdc6148 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -740,5 +740,16 @@ if ('basic') { my $smsg = $es->over->next_by_mid($msgid, \(my $id), \(my $prev)); ok $smsg, 'new message imported into over.sqlite3 w/ basic'; } +SKIP: { + my $bdir = $ENV{BTRFS_TESTDIR} or skip 'BTRFS_TESTDIR not defined', 1; + my $lsattr = require_cmd 'lsattr', 1; + my $tmp = File::Temp->newdir('eidx-cow-XXXX', DIR => $bdir); + local $ENV{DUMP} = 1; + ok run_script([qw(-extindex --cow --all), "$tmp/eidx"], undef, + { 2 => \(my $err = '') }), 'extindexed w/ --cow'; + diag $err; + my $lsa = xqx([$lsattr, '-Rl', glob("$tmp/eidx/ei*")]); + unlike $lsa, qr/No_COW/i, '--cow respected'; +} done_testing; diff --git a/t/nodatacow.t b/t/nodatacow.t index b482a0ec6..5e5c1e3ba 100644 --- a/t/nodatacow.t +++ b/t/nodatacow.t @@ -50,6 +50,10 @@ SKIP: { PublicInbox::Syscall::nodatacow_dir($name); is_deeply \@w, [], 'no warnings if CoW already disabled'; } + open $fh, '<', $name or BAIL_OUT "open($name): $!"; + PublicInbox::Syscall::yesdatacow_fh($fh); + $res = xqx([$lsattr, '-d', $name]); + like $res, qr/^-+ \Q$name\E/, "`C' attribute cleared"; }; done_testing; -- 2.47.3