From: Eric Wong Date: Thu, 26 Dec 2024 21:48:51 +0000 (+0000) Subject: watch: don't count invalid paths against batch limit X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=84b4a02f57080e28854a1698372cd987ab2f2b58;p=thirdparty%2Fpublic-inbox.git watch: don't count invalid paths against batch limit Invalid paths such as `.', `..', `.mh_sequence', and perhaps other implementation-specific files may throw off the count and cause premature commits. While the premature commit isn't too harmful in the common case, it's possible a pathological case of having too many non-mail entries in a directory can cause noticeable slowdowns and storage wear. So have _try_path() and _remove_spam() return a true value if a file was actually read. We'll also simplify the $inboxes check by relying simply on `eq', since the `ref' check isn't necessary as the `eq' against a ref will never match the "watchspam" literal. --- diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 0520967f6..5cde1d80d 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -207,6 +207,7 @@ sub remove_eml_i { # each_inbox callback } } +# returns true if a file was read sub _remove_spam { my ($self, $path) = @_; # path must be marked as (S)een @@ -214,6 +215,7 @@ sub _remove_spam { my $eml = eml_from_path($path) or return; local $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb(); $self->{pi_cfg}->each_inbox(\&remove_eml_i, $self, $eml, $path); + 1; } sub import_eml ($$$) { @@ -243,13 +245,18 @@ sub import_eml ($$$) { } } -sub _try_path { +# returns true if a file was read +sub _try_path ($$) { my ($self, $path) = @_; - $path =~ $self->{d_re} or - return warn("BUG? unrecognized path: $path\n"); + if ($path !~ $self->{d_re}) { + warn "BUG? unrecognized path: $path\n"; + return; + } my $dir = $1; - my $inboxes = $self->{d_map}->{$dir} // - return warn("W: unmappable dir: $dir\n"); + my $inboxes = $self->{d_map}->{$dir} // do { + warn "W: unmappable dir: $dir\n"; + return; + }; my ($md_fl, $mh_seq); if ($self->{d_type}->{$dir} & D_MH) { $path =~ m!/([0-9]+)\z! ? ($mh_seq = $1) : return; @@ -267,13 +274,12 @@ sub _try_path { my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : ''; $warn_cb->($pfx, "path: $path\n", @_); }; - if (!ref($inboxes) && $inboxes eq 'watchspam') { - return _remove_spam($self, $path); - } - foreach my $ibx (@$inboxes) { + return _remove_spam($self, $path) if $inboxes eq 'watchspam'; + for my $ibx (@$inboxes) { my $eml = eml_from_path($path) or next; - import_eml($self, $ibx, $eml); + import_eml($self, $ibx, $eml); # $eml may be scrubbed } + 1; } sub quit_done ($) { @@ -598,8 +604,7 @@ sub fs_scan_step { } my $n = $self->{max_batch}; while (my $fn = readdir($dh)) { - _try_path($self, "$dir/$fn"); - last if --$n < 0; + last if _try_path($self, "$dir/$fn") and --$n < 0; } if ($n < 0) { unshift @{$self->{scan_q}}, [ $dir, $dh ];