#!/usr/bin/perl -w
-# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Used for training spam (via SpamAssassin) and removing messages from a
options:
--all scan all inboxes on `rm'
+ -k keep going after errors (for read-only inboxes)
See public-inbox-learn(1) man page for full documentation.
EOF
use PublicInbox::Spamcheck::Spamc;
use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
my %opt = (all => 0);
-GetOptions(\%opt, qw(all help|h)) or die $help;
+GetOptions(\%opt, qw(all keep-going|k help|h)) or die $help;
use PublicInbox::Import;
my $train = shift or die $help;
my $pi_cfg = PublicInbox::Config->new;
local $PublicInbox::Import::DROP_UNIQUE_UNSUB;
PublicInbox::Import::load_config($pi_cfg);
-my $err;
+my ($err, @fail_ibx);
my $mime = PublicInbox::Eml->new(do{
my $data = PublicInbox::IO::read_all \*STDIN;
PublicInbox::Eml::strip_from($data);
} elsif ($train eq 'spam') {
$spamc->spamlearn(\$data);
}
- die "spamc failed with: $?\n" if $?;
+ die "E: spamc failed with: $?\n" if $?;
};
$err = $@;
}
\$data
});
-sub remove_or_add ($$$$) {
+my $ibx_fail = sub {
+ my ($ibx) = @_;
+ my $m = "E: $@ ($ibx->{inboxdir})\n";
+ die $m if !$opt{'keep-going'};
+ warn $m;
+ push @fail_ibx, $ibx;
+};
+
+my $remove_or_add = sub {
my ($ibx, $train, $mime, $addr) = @_;
+ eval {
+ # We do not touch GIT_COMMITTER_* env here so we can track
+ # who trained the message.
+ $ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name};
+ $ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr;
+ $ibx = PublicInbox::InboxWritable->new($ibx);
+ $ibx->{indexlevel} = $ibx->detect_indexlevel;
+ my $im = $ibx->importer(0);
- # We do not touch GIT_COMMITTER_* env here so we can track
- # who trained the message.
- $ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name};
- $ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr;
- $ibx = PublicInbox::InboxWritable->new($ibx);
- $ibx->{indexlevel} = $ibx->detect_indexlevel;
- my $im = $ibx->importer(0);
-
- if ($train eq "rm") {
- # This needs to be idempotent, as my inotify trainer
- # may train for each cross-posted message, and this
- # script already learns for every list in
- # ~/.public-inbox/config
- $im->remove($mime, $train);
- } elsif ($train eq "ham") {
- # no checking for spam here, we assume the message has
- # been reviewed by a human at this point:
- PublicInbox::MDA->set_list_headers($mime, $ibx);
-
- # Ham messages are trained when they're marked into
- # a SEEN state, so this is idempotent:
- $im->add($mime);
- }
- $im->done;
-}
+ if ($train eq "rm") {
+ # This needs to be idempotent, as my inotify trainer
+ # may train for each cross-posted message, and this
+ # script already learns for every list in
+ # ~/.public-inbox/config
+ $im->remove($mime, $train);
+ } elsif ($train eq "ham") {
+ # no checking for spam here, we assume the message has
+ # been reviewed by a human at this point:
+ PublicInbox::MDA->set_list_headers($mime, $ibx);
+
+ # Ham messages are trained when they're marked into
+ # a SEEN state, so this is idempotent:
+ $im->add($mime);
+ }
+ $im->done;
+ };
+ $ibx_fail->($ibx) if $@;
+};
-# spam is removed from all known inboxes since it is often Bcc:-ed
-if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) {
- $pi_cfg->each_inbox(sub {
- my ($ibx) = @_;
+my $remove_all = sub { # each_inbox cb
+ my ($ibx) = @_;
+ eval {
$ibx = PublicInbox::InboxWritable->new($ibx);
my $im = $ibx->importer(0);
$im->remove($mime, $train);
$im->done;
- });
+ };
+ $ibx_fail->($ibx) if $@;
+};
+
+# spam is removed from all known inboxes since it is often Bcc:-ed
+if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) {
+ $pi_cfg->each_inbox($remove_all);
} else {
require PublicInbox::MDA;
while (my ($addr, $ibx) = each %dests) {
next unless ref($ibx); # $ibx may be 0
next if $seen{0 + $ibx}++;
- remove_or_add($ibx, $train, $mime, $addr);
+ $remove_or_add->($ibx, $train, $mime, $addr);
}
my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_cfg, $mime);
for my $ibx (@$dests) {
next if $seen{0 + $ibx}++;
- remove_or_add($ibx, $train, $mime, $ibx->{-primary_address});
+ $remove_or_add->($ibx, $train, $mime, $ibx->{-primary_address});
}
}
-if ($err) {
- warn $err;
+if ($err || @fail_ibx) {
+ warn $err if $err;
+ warn 'E: ', scalar(@fail_ibx), " inbox(es) failed\n";
exit 1;
}
--- /dev/null
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use v5.12;
+use autodie;
+use PublicInbox::TestCommon;
+plan skip_all => "cannot test $0 as root" if $> == 0;
+require_mods qw(DBD::SQLite);
+my ($v2, $v2ro, @v2cfg);
+my $tmpdir = tmpdir;
+my $eml = eml_load 't/plack-qp.eml';
+my $v1 = create_inbox 'v1', indexlevel => 'basic', tmpdir => "$tmpdir/v1", sub {
+ my ($im) = @_;
+ $im->add($eml);
+};
+
+my $v1ro = create_inbox 'v1ro', indexlevel => 'basic',
+ tmpdir => "$tmpdir/v1ro", sub {
+ my ($im) = @_;
+ $im->add($eml);
+};
+chmod 0500, $v1ro->{inboxdir};
+chmod 0400, glob("$v1ro->{inboxdir}/public-inbox/xapian*/over.sqlite3"),
+ glob("$v1ro->{inboxdir}/public-inbox/msgmap.sqlite3");
+
+SKIP: {
+ require_git v2.6, 1;
+ $v2 = create_inbox 'v2', indexlevel => 'basic', version => 2,
+ tmpdir => "$tmpdir/v2", sub {
+ my ($im, $ibx) = @_;
+ $im->add($eml);
+ };
+ $v2ro = create_inbox 'v2', indexlevel => 'basic', version => 2,
+ tmpdir => "$tmpdir/v2ro", sub {
+ my ($im, $ibx) = @_;
+ $im->add($eml);
+ };
+ chmod 0500, $v2ro->{inboxdir}, "$v2ro->{inboxdir}/git/0.git";
+ chmod 0400, glob("$v2ro->{inboxdir}/xap*/over.sqlite3"),
+ glob("$v2ro->{inboxdir}/msgmap.sqlite3");
+ @v2cfg = (<<EOM);
+[publicinbox "v2ro"]
+ inboxdir = $v2ro->{inboxdir};
+ address = v2ro\@example.com
+ indexlevel = basic
+[publicinbox "v2"]
+ inboxdir = $v2->{inboxdir};
+ address = v2\@example.com
+ indexlevel = basic
+EOM
+}
+
+my $cfg = cfg_new $tmpdir, <<EOM, @v2cfg;
+[publicinbox "v1ro"]
+ inboxdir = $v1ro->{inboxdir}
+ address = v1ro\@example.com
+ indexlevel = basic
+[publicinbox "v1"]
+ inboxdir = $v1->{inboxdir}
+ address = v1\@example.com
+ indexlevel = basic
+EOM
+
+my $opt = {
+ 0 => \($eml->as_string),
+ 1 => \(my $out),
+ 2 => \(my $err),
+};
+my $env = { PI_CONFIG => $cfg->{-f} };
+
+run_script [ qw(-learn rm --all -k) ], $env, $opt;
+isnt $?, 0, 'learn $? is non-zero';
+is 0, $v1->over->max, 'removed from r/w v1';
+is 1, $v1ro->over->max, 'not removed from r/o v1';
+my $nr = 1;
+SKIP: {
+ require_git v2.6, 1;
+ is 0, $v2->over->max, 'removed from r/w v2';
+ is 1, $v2ro->over->max, 'not removed from r/o v2';
+ $nr = 2;
+}
+
+like $err, qr/E: $nr inbox\(es\) failed/, 'failures noted in stderr';
+is $out, '', 'stdout is empty';
+
+done_testing;