]> git.ipfire.org Git - thirdparty/public-inbox.git/commitdiff
cindex: add --show-roots switch
authorEric Wong <e@80x24.org>
Thu, 24 Aug 2023 01:22:32 +0000 (01:22 +0000)
committerEric Wong <e@80x24.org>
Thu, 24 Aug 2023 07:47:50 +0000 (07:47 +0000)
This aids in development, but I'm not sure it's going to stay
or be moved into another interface.

lib/PublicInbox/CodeSearchIdx.pm
script/public-inbox-cindex

index 2480dbd20564ccd41847ca53fdeae5e74a01b6d7..e795c2b38ca2a994a2e1245da97d890e2f9fd644 100644 (file)
@@ -1058,6 +1058,37 @@ sub _prep_ibx { # each_inbox callback
                push @{$self->{IBX}}, $ibx;
 }
 
+sub show_roots { # for diagnostics
+       my ($self) = @_;
+       local $self->{xdb};
+       my $cur = $self->xdb->allterms_begin('G');
+       my $end = $self->{xdb}->allterms_end('G');
+       my $qrepo = $PublicInbox::Search::X{Query}->new('T'.'r');
+       my $enq = $PublicInbox::Search::X{Enquire}->new($self->{xdb});
+       $enq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new);
+       $enq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING);
+       for (; $cur != $end; $cur++) {
+               my $G_oidhex = $cur->get_termname;
+               my $qry = $PublicInbox::Search::X{Query}->new(
+                               PublicInbox::Search::OP_FILTER(),
+                               $qrepo, $G_oidhex);
+               $enq->set_query($qry);
+               my ($off, $lim) = (0, 10000);
+               say 'commit ',substr($G_oidhex, 1), ' appears in:';
+               while (1) {
+                       my $mset = $enq->get_mset($off, $lim);
+                       my $size = $mset->size or last;
+                       for my $x ($mset->items) {
+                               my $doc = $x->get_document;
+                               for (xap_terms('P', $x->get_document)) {
+                                       say '- /', substr($_, 1);
+                               }
+                       }
+                       $off += $size;
+               }
+       }
+}
+
 sub cidx_run { # main entry point
        my ($self) = @_;
        my $restore_umask = prep_umask($self);
@@ -1150,6 +1181,7 @@ sub cidx_run { # main entry point
        PublicInbox::DS::event_loop($MY_SIG, $SIGSET) if shards_active();
        PublicInbox::DS->Reset;
        $self->lock_release(!!$NCHANGE);
+       show_roots($self) if $self->{-opt}->{'show-roots'} # for diagnostics
 }
 
 sub ipc_atfork_child { # @IDX_SHARDS
index 888c8b1030c8872a86bdc0bac81af8fad4a1c5a1..0526434ce1ddb2171e245d77cbcfb7cd279a9803 100755 (executable)
@@ -29,7 +29,7 @@ GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous
                indexlevel|index-level|L=s associate associate-max=i
                associate-date-range=s associate-prefixes=s@
                batch_size|batch-size=s max_size|max-size=s
-               include|I=s@ only=s@ all
+               include|I=s@ only=s@ all show-roots
                project-list=s exclude=s@
                sort-parallel=s sort-compress-program=s sort-buffer-size=s
                d=s update|u scan! prune dry-run|n C=s@ help|h))