]> git.ipfire.org Git - thirdparty/public-inbox.git/commitdiff
watch: only open one directory at a time when scanning
authorEric Wong <e@80x24.org>
Fri, 26 Jul 2024 21:59:25 +0000 (21:59 +0000)
committerEric Wong <e@80x24.org>
Tue, 30 Jul 2024 23:26:28 +0000 (23:26 +0000)
This avoids EMFILE/ENFILE for large setups with many Maildir
watch directives.  It also makes adding per-directory scanning
messages easier in the next commit.

lib/PublicInbox/Watch.pm
script/public-inbox-watch

index 74cc599c7c70a6c86a30f5a83eafb0bf37ff1a69..f793d0d8c4b14a904d6e753daa0f720b02964cd7 100644 (file)
@@ -154,7 +154,7 @@ sub new {
                imap_order => scalar(@imap) ? \@imap : undef,
                nntp_order => scalar(@nntp) ? \@nntp: undef,
                importers => {},
-               opendirs => {}, # dirname => dirhandle (in progress scans)
+               scan_q => [], # [ [dirname,dirhandle] || dirname, ...]
                ops => [], # 'quit', 'full'
        }, $class;
 }
@@ -289,7 +289,9 @@ sub quit_done ($) {
 sub quit { # may be called in IMAP/NNTP children
        my ($self) = @_;
        $self->{quit} = 1;
-       %{$self->{opendirs}} = ();
+       if (my $scan_q = $self->{scan_q}) {
+               @$scan_q = ();
+       }
        _done_for_now($self);
        quit_done($self);
        if (my $dir_idle = delete $self->{dir_idle}) {
@@ -417,7 +419,7 @@ sub watch_imap_idle_1 ($$$) {
 
 sub watch_atfork_child ($) {
        my ($self) = @_;
-       delete @$self{qw(dir_idle pids opendirs)};
+       delete @$self{qw(dir_idle pids scan_q)};
        my $sig = delete $self->{sig};
        $sig->{CHLD} = $sig->{HUP} = $sig->{USR1} = 'DEFAULT';
        # TERM/QUIT/INT call ->quit, which works in both parent+child
@@ -578,56 +580,44 @@ sub watch { # main entry point
        _done_for_now($self);
 }
 
-sub trigger_scan {
-       my ($self, $op) = @_;
-       push @{$self->{ops}}, $op;
-       PublicInbox::DS::requeue($self);
-}
-
 sub fs_scan_step {
        my ($self) = @_;
        return if $self->{quit};
-       my $op = shift @{$self->{ops}};
        local $PublicInbox::DS::in_loop = 0; # waitpid() synchronously
 
        # continue existing scan
-       my $opendirs = $self->{opendirs};
-       my @dirnames = keys %$opendirs;
-       foreach my $dir (@dirnames) {
-               my $dh = delete $opendirs->{$dir};
+       while (defined(my $dir = shift @{$self->{scan_q}})) {
+               my $dh;
+               if (ref($dir) eq 'ARRAY') { # continue existing
+                       ($dir, $dh) = @$dir;
+               } elsif (!opendir($dh, $dir)) {
+                       warn "W: failed to open $dir: $! (non-fatal)\n";
+                       next;
+               }
                my $n = $self->{max_batch};
                while (my $fn = readdir($dh)) {
                        _try_path($self, "$dir/$fn");
                        last if --$n < 0;
                }
-               $opendirs->{$dir} = $dh if $n < 0;
-       }
-       if ($op && $op eq 'full') {
-               foreach my $dir (keys %{$self->{d_map}}) {
-                       next if $opendirs->{$dir}; # already in progress
-                       my $ok = opendir(my $dh, $dir);
-                       unless ($ok) {
-                               warn "failed to open $dir: $!\n";
-                               next;
-                       }
-                       my $n = $self->{max_batch};
-                       while (my $fn = readdir($dh)) {
-                               _try_path($self, "$dir/$fn");
-                               last if --$n < 0;
-                       }
-                       $opendirs->{$dir} = $dh if $n < 0;
+               if ($n < 0) {
+                       unshift @{$self->{scan_q}}, [ $dir, $dh ];
+                       last;
                }
        }
        _done_for_now($self);
        # do we have more work to do?
-       keys(%$opendirs) ? PublicInbox::DS::requeue($self)
+       @{$self->{scan_q}} ? PublicInbox::DS::requeue($self)
                : warn("# full scan complete\n");
 }
 
 sub scan {
        my ($self, $op) = @_;
-       push @{$self->{ops}}, $op;
-       fs_scan_step($self);
+       if ($op eq 'full') {
+               push @{$self->{scan_q}}, keys %{$self->{d_map}};
+               fs_scan_step($self);
+       } else {
+               warn "BUG? unknown scan op: `$op' (non-fatal)\n";
+       }
 }
 
 sub _importer_for {
index 64300f09539e056e1e86da274e5a13e15870ef5c..80dd5e07a5463862ff692951da7e500c893d6a49 100755 (executable)
@@ -42,7 +42,7 @@ if ($watch) {
                return if !$watch;
                my ($s) = @_;
                warn "# scanning (full) ", ($s ? "on $s" : 'at startup'), "\n";
-               $watch->trigger_scan('full');
+               $watch->scan('full');
        };
        my $quit = sub { # may be called in IMAP/NNTP children
                $watch->quit if $watch;