From 2c472f1f571ae55155c78bcbc4d420d06266ba63 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 15 Nov 2023 09:21:43 +0000 Subject: [PATCH] lei convert: fix repeat and idempotent v2 output We should be able to treat v2 outputs just like any other mail format, with the exception that content dedupe is always enforced by the v2 format. This allows users hosting v2 public-inboxes to catch up broken synchronization from alternate archives such as the mbox archives hosted by https://lists.gnu.org/ Link: https://public-inbox.org/meta/20231114-hypersonic-papaya-starling-e1cfc8@nitro/ --- lib/PublicInbox/LeiConvert.pm | 8 ++++++-- lib/PublicInbox/LeiOverview.pm | 4 ++-- lib/PublicInbox/LeiToMail.pm | 3 +-- lib/PublicInbox/LeiXSearch.pm | 4 ++-- lib/PublicInbox/V2Writable.pm | 3 ++- t/lei-convert.t | 31 ++++++++++++++++++++++++++++++- 6 files changed, 43 insertions(+), 10 deletions(-) diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm index 22aba81a0..4a1f83233 100644 --- a/lib/PublicInbox/LeiConvert.pm +++ b/lib/PublicInbox/LeiConvert.pm @@ -34,9 +34,13 @@ sub process_inputs { # via wq_do $self->SUPER::process_inputs; my $lei = $self->{lei}; delete $lei->{1}; - my $l2m = delete $self->{l2m}; - delete $self->{wcb}; # commit + my $l2m = delete $lei->{l2m}; my $nr_w = delete($l2m->{-nr_write}) // 0; + delete $self->{wcb}; # commit + if (my $v2w = delete $lei->{v2w}) { + $nr_w = $v2w->wq_do('done'); # may die + $v2w->wq_close; + } my $d = (delete($l2m->{-nr_seen}) // 0) - $nr_w; $d = $d ? " ($d duplicates)" : ''; $lei->qerr("# converted $nr_w messages$d"); diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm index 129dabf8c..0529bbe40 100644 --- a/lib/PublicInbox/LeiOverview.pm +++ b/lib/PublicInbox/LeiOverview.pm @@ -41,8 +41,8 @@ sub detect_fmt ($) { my ($dst) = @_; if ($dst =~ m!\A([:/]+://)!) { die "$1 support not implemented, yet\n"; - } elsif (!-e $dst || -d _) { - 'maildir'; # the default TODO: MH? + } elsif (!-e $dst || -d _) { # maildir is the default TODO: MH + -e "$dst/inbox.lock" ? 'v2' : 'maildir'; } elsif (-f _ || -p _) { die "unable to determine mbox family of $dst\n"; } else { diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 2928be453..2d9b7061f 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -375,7 +375,6 @@ sub _v2_write_cb ($$) { ++$self->{-nr_seen}; return if $dedupe && $dedupe->is_dup($eml, $smsg); $lei->{v2w}->wq_do('add', $eml); # V2Writable->add - ++$self->{-nr_write}; } } @@ -435,7 +434,7 @@ sub new { ($lei->{opt}->{dedupe}//'') eq 'oid'; $self->{base_type} = 'v2'; $self->{-wq_nr_workers} = 1; # v2 has shards - $lei->{opt}->{save} = \1; + $lei->{opt}->{save} //= \1 if $lei->{cmd} eq 'q'; $dst = $lei->{ovv}->{dst} = $lei->abs_path($dst); @conflict = qw(mua sort); } else { diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index e85fd3c4a..7eda6f9e5 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -391,8 +391,9 @@ sub query_done { # EOF callback for main daemon ($lei->{opt}->{'mail-sync'} && !$lei->{sto}) and warn "BUG: {sto} missing with --mail-sync"; $lei->sto_done_request; + my $nr_w = delete($lei->{-nr_write}) // 0; if (my $v2w = delete $lei->{v2w}) { - my $wait = $v2w->wq_do('done'); # may die + $nr_w = $v2w->wq_do('done'); # may die $v2w->wq_close; } $lei->{ovv}->ovv_end($lei); @@ -412,7 +413,6 @@ Error closing $lei->{ovv}->{dst}: \$!=$! \$?=$? delete $l2m->{mbl}; # drop dotlock } } - my $nr_w = delete($lei->{-nr_write}) // 0; my $nr_dup = (delete($lei->{-nr_seen}) // 0) - $nr_w; if ($lei->{-progress}) { my $tot = $lei->{-mset_total} // 0; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 4d606dfef..231ed5169 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -135,7 +135,7 @@ sub add { if (do_idx($self, $mime, $smsg)) { $self->checkpoint; } - + ++$self->{-nr_add}; # for lei convert $cmt; } @@ -611,6 +611,7 @@ sub done { $self->lock_release(!!$nbytes) if $shards; $self->git->cleanup; die $err if $err; + delete $self->{-nr_add}; # for lei-convert } sub importer { diff --git a/t/lei-convert.t b/t/lei-convert.t index 84b57f81d..6aff80bbb 100644 --- a/t/lei-convert.t +++ b/t/lei-convert.t @@ -8,7 +8,8 @@ use PublicInbox::NetReader; use PublicInbox::Eml; use IO::Uncompress::Gunzip; use File::Path qw(remove_tree); -use PublicInbox::Spawn qw(which); +use PublicInbox::Spawn qw(which run_qx); +use File::Compare; use autodie qw(open); require_mods(qw(lei -imapd -nntpd Mail::IMAPClient Net::NNTP)); my ($tmpdir, $for_destroy) = tmpdir; @@ -28,8 +29,36 @@ test_lei({ tmpdir => $tmpdir }, sub { my $d = $ENV{HOME}; lei_ok('convert', '-o', "mboxrd:$d/foo.mboxrd", "imap://$imap_host_port/t.v2.0"); + my ($nc0) = ($lei_err =~ /converted (\d+) messages/); ok(-f "$d/foo.mboxrd", 'mboxrd created from imap://'); + lei_ok qw(convert -o), "v2:$d/v2-test", "mboxrd:$d/foo.mboxrd"; + my ($nc) = ($lei_err =~ /converted (\d+) messages/); + is $nc, $nc0, 'converted all messages messages'; + lei_ok qw(q z:0.. -f jsonl --only), "$d/v2-test"; + is(scalar(split(/^/sm, $lei_out)), $nc, 'got all messages in v2-test'); + + lei_ok qw(convert -o), "mboxrd:$d/from-v2.mboxrd", "$d/v2-test"; + like $lei_err, qr/converted $nc messages/; + is(compare("$d/foo.mboxrd", "$d/from-v2.mboxrd"), 0, + 'convert mboxrd -> v2 ->mboxrd roundtrip') or + diag run_qx([qw(git diff --no-index), + "$d/foo.mboxrd", "$d/from-v2.mboxrd"]); + + lei_ok [qw(convert -F eml -o), "$d/v2-test"], undef, + { 0 => \<<'EOM', %$lei_opt }; +From: f@example.com +To: t@example.com +Subject: append-to-v2-on-convert +Message-ID: +Date: Fri, 02 Oct 1993 00:00:00 +0000 +EOM + like $lei_err, qr/converted 1 messages/, 'only one message added'; + lei_ok qw(q z:0.. -f jsonl --only), "$d/v2-test"; + is(scalar(split(/^/sm, $lei_out)), $nc + 1, + 'got expected number of messages after append convert'); + like $lei_out, qr/append-to-v2-on-convert/; + lei_ok('convert', '-o', "mboxrd:$d/nntp.mboxrd", "nntp://$nntp_host_port/t.v2"); ok(-f "$d/nntp.mboxrd", 'mboxrd created from nntp://'); -- 2.47.2