our @CODE_NRP;
our @CODE_VMAP = (
[ AT, 'd:' ], # mairix compat
- [ AT, 'dt:' ], # mail compat
+ [ AT, 'dt:' ], # public-inbox mail compat
[ CT, 'ct:' ],
);
sub new {
my ($cls, $dir, $cfg) = @_;
# can't have a PublicInbox::Config here due to circular refs
- bless { xpfx => "$dir/cidx".CIDX_SCHEMA_VER,
+ bless { topdir => $dir, xpfx => "$dir/cidx".CIDX_SCHEMA_VER,
-cfg_f => $cfg->{-f} }, $cls;
}
my $cur = $self->xdb->get_metadata($key) or return;
$cur = eval { PublicInbox::Config::json()->decode(uncompress($cur)) };
warn "E: $@ (corrupt metadata in `$key' key?)" if $@;
- $cur;
+ my @m = grep { ref($cur->{$_}) ne 'ARRAY' } qw(ekeys roots ibx2root);
+ if (@m) {
+ warn <<EOM;
+W: $self->{topdir} join data for $self->{-cfg_f} missing: @m
+EOM
+ undef;
+ } elsif (@{$cur->{ekeys}} != @{$cur->{ibx2root}}) {
+ warn <<EOM;
+W: $self->{topdir} join data for $self->{-cfg_f} mismatched ekeys and ibx2root
+EOM
+ undef;
+ } else {
+ $cur;
+ }
}
sub qparse_new ($) {
\%ret;
}
-sub paths2roots { # for diagnostics
- my ($self) = @_;
+sub root_oids ($$) {
+ my ($self, $git_dir) = @_;
+ my @ids = $self->docids_by_postlist('P'.$git_dir);
+ @ids or warn <<"";
+BUG? (non-fatal) `$git_dir' not indexed in $self->{topdir}
+
+ warn <<"" if @ids > 1;
+BUG: (non-fatal) $git_dir indexed multiple times in $self->{topdir}
+
my %ret;
- my $tmp = roots2paths($self);
- for my $root_oidhex (keys %$tmp) {
- my $paths = delete $tmp->{$root_oidhex};
- push @{$ret{$_}}, $root_oidhex for @$paths;
+ for my $docid (@ids) {
+ my @oids = xap_terms('G', $self->xdb, $docid);
+ @ret{@oids} = @oids;
+ }
+ sort keys %ret;
+}
+
+sub paths2roots {
+ my ($self, $paths) = @_;
+ my %ret;
+ if ($paths) {
+ for my $p (keys %$paths) { @{$ret{$p}} = root_oids($self, $p) }
+ } else {
+ my $tmp = roots2paths($self);
+ for my $root_oidhex (keys %$tmp) {
+ my $paths = delete $tmp->{$root_oidhex};
+ push @{$ret{$_}}, $root_oidhex for @$paths;
+ }
+ @$_ = sort(@$_) for values %ret;
}
- @$_ = sort(@$_) for values %ret;
\%ret;
}
+sub load_commit_times { # each_cindex callback
+ my ($self, $todo) = @_; # todo = [ [ time, git ], [ time, git ] ...]
+ my (@pending, $rec, $dir, @ids, $doc);
+ while ($rec = shift @$todo) {
+ @ids = $self->docids_by_postlist('P'.$rec->[1]->{git_dir});
+ if (@ids) {
+ warn <<EOM if @ids > 1;
+W: $rec->[1]->{git_dir} indexed multiple times in $self->{topdir}
+EOM
+ for (@ids) {
+ $doc = $self->get_doc($_) // next;
+ $rec->[0] = int_val($doc, CT);
+ last;
+ }
+ } else { # may be in another cindex:
+ push @pending, $rec;
+ }
+ }
+ @$todo = @pending;
+}
+
+sub load_coderepos { # each_cindex callback
+ my ($self, $pi_cfg) = @_;
+ my $name = $self->{name};
+ my $cfg_f = $pi_cfg->{-f};
+ my $lpfx = $self->{localprefix} or return warn <<EOM;
+W: cindex.$name.localprefix unset in $cfg_f, ignoring cindex.$name
+EOM
+ my $lre = join('|', map { $_ .= '/'; tr!/!/!s; quotemeta } @$lpfx);
+ $lre = qr!\A(?:$lre)!;
+ my $coderepos = $pi_cfg->{-coderepos};
+ my $nick_pfx = $name eq '' ? '' : "$name/";
+ my %dir2cr;
+ for my $p ($self->all_terms('P')) {
+ my $nick = $p;
+ $nick =~ s!$lre!$nick_pfx!s or next;
+ $dir2cr{$p} = $coderepos->{$nick} //= do {
+ my $git = PublicInbox::Git->new($p);
+ $git->{nick} = $nick; # for git->pub_urls
+ $git;
+ };
+ }
+ my $jd = join_data($self) or return warn <<EOM;
+W: cindex.$name.topdir=$self->{topdir} has no usable join data for $cfg_f
+EOM
+ my ($ekeys, $roots, $ibx2root) = @$jd{qw(ekeys roots ibx2root)};
+ my $roots2paths = roots2paths($self);
+ for my $root_offs (@$ibx2root) {
+ my $ekey = shift(@$ekeys) // die 'BUG: {ekeys} empty';
+ scalar(@$root_offs) or next;
+ my $ibx = $pi_cfg->lookup_eidx_key($ekey) // do {
+ warn "W: `$ekey' gone from $cfg_f\n";
+ next;
+ };
+ my $gits = $ibx->{-repo_objs} //= [];
+ my $cr_score = $ibx->{-cr_score} //= {};
+ my %ibx_p2g = map { $_->{git_dir} => $_ } @$gits;
+ my $ibx2self; # cindex has an association w/ inbox?
+ for (@$root_offs) { # sorted by $nr descending
+ my ($nr, $root_off) = @$_;
+ my $root_oid = $roots->[$root_off] // do {
+ warn <<EOM;
+BUG: root #$root_off invalid in join data for `$ekey' with $cfg_f
+EOM
+ next;
+ };
+ my $git_dirs = $roots2paths->{$root_oid};
+ my @gits = map { $dir2cr{$_} // () } @$git_dirs;
+ $cr_score->{$_->{nick}} //= $nr for @gits;
+ @$git_dirs = grep { !$ibx_p2g{$_} } @$git_dirs;
+ # @$git_dirs or warn "W: no matches for $root_oid\n";
+ for (@$git_dirs) {
+ if (my $git = $dir2cr{$_}) {
+ $ibx_p2g{$_} = $git;
+ $ibx2self = 1;
+ $ibx->{-hide}->{www} or
+ push @{$git->{ibx_score}},
+ [ $nr, $ibx->{name} ];
+ push @$gits, $git;
+ } else {
+ warn <<EOM;
+W: no coderepo available for $_ (localprefix=@$lpfx)
+EOM
+ }
+ }
+ }
+ if (@$gits) {
+ push @{$ibx->{-csrch}}, $self if $ibx2self;
+ } else {
+ delete $ibx->{-repo_objs};
+ delete $ibx->{-cr_score};
+ }
+ }
+ for my $git (values %dir2cr) {
+ my $s = $git->{ibx_score};
+ @$s = sort { $b->[0] <=> $a->[0] } @$s if $s;
+ }
+}
+
1;
sub update_commit ($$$) {
my ($self, $cmt, $roots) = @_; # fields from @FMT
my $x = 'Q'.$cmt->{H};
- my ($docid, @extra) = sort { $a <=> $b } docids_by_postlist($self, $x);
+ my ($docid, @extra) = sort { $a <=> $b } $self->docids_by_postlist($x);
@extra and warn "W: $cmt->{H} indexed multiple times, pruning ",
join(', ', map { "#$_" } @extra), "\n";
$self->{xdb}->delete_document($_) for @extra;
# used to select the shard for a GIT_DIR
sub git_dir_hash ($) { hex(substr(sha256_hex($_[0]), 0, 8)) }
-sub docids_by_postlist ($$) { # consider moving to PublicInbox::Search
- my ($self, $q) = @_;
- my $cur = $self->{xdb}->postlist_begin($q);
- my $end = $self->{xdb}->postlist_end($q);
- my @ids;
- for (; $cur != $end; $cur++) { push(@ids, $cur->get_docid) };
- @ids;
-}
-
sub _cb { # run_await cb
my ($pid, $cmd, undef, $opt, $cb, $self, $git, @arg) = @_;
return if $DO_QUIT;
sub check_existing { # retry_reopen callback
my ($shard, $self, $git) = @_;
- my @docids = docids_by_postlist($shard, 'P'.$git->{git_dir});
+ my @docids = $shard->docids_by_postlist('P'.$git->{git_dir});
my $docid = shift(@docids) // return get_roots($self, $git);
my $doc = $shard->get_doc($docid) //
die "BUG: no #$docid ($git->{git_dir})";
sub prune_one { # via wq_io_do in IDX_SHARDS
my ($self, $term) = @_;
- my @docids = docids_by_postlist($self, $term);
+ my @docids = $self->docids_by_postlist($term);
for (@docids) {
$TXN_BYTES -= $self->{xdb}->get_doclength($_) * 42;
$self->{xdb}->delete_document($_);
sub score_old_join_data ($$$) {
my ($self, $score, $ekeys_new) = @_;
my $old = ($JOIN{reset} ? undef : current_join_data($self)) or return;
- my @old = @$old{qw(ekeys roots ibx2root)};
- @old == 3 or return warn "W: ekeys/roots missing from old JOIN data\n";
progress($self, 'merging old join data...');
- my ($ekeys_old, $roots_old, $ibx2root_old) = @old;
+ my ($ekeys_old, $roots_old, $ibx2root_old) =
+ @$old{qw(ekeys roots ibx2root)};
# score: "ibx_off root_off" => nr
my $i = -1;
my %root2id_new = map { $_ => ++$i } @OFF2ROOT;
my %ekey2id_new = map { $_ => ++$i } @$ekeys_new;
for my $ibx_off_old (0..$#$ibx2root_old) {
my $root_offs_old = $ibx2root_old->[$ibx_off_old];
- my $ekey = $ekeys_old->[$ibx_off_old] //
- warn "W: no ibx #$ibx_off_old in old JOIN data\n";
- my $ibx_off_new = $ekey2id_new{$ekey // next} //
+ my $ekey = $ekeys_old->[$ibx_off_old] // do {
+ warn "W: no ibx #$ibx_off_old in old join data\n";
+ next;
+ };
+ my $ibx_off_new = $ekey2id_new{$ekey} // do {
warn "W: `$ekey' no longer exists\n";
+ next;
+ };
for (@$root_offs_old) {
my ($nr, $rid_old) = @$_;
- my $root_old = $roots_old->[$rid_old] //
- warn "W: no root #$rid_old in old JOIN data\n";
- my $rid_new = $root2id_new{$root_old // next} //
+ my $root_old = $roots_old->[$rid_old] // do {
+ warn "W: no root #$rid_old in old data\n";
+ next;
+ };
+ my $rid_new = $root2id_new{$root_old} // do {
warn "W: root `$root_old' no longer exists\n";
+ next;
+ };
$score->{"$ibx_off_new $rid_new"} += $nr;
}
}
progress($self, "$ekey => $root has $nr matches");
push @{$new->{ibx2root}->[$ibx_off]}, [ $nr, $root_off ];
}
- for my $ary (values %$new) { # sort by nr
+ for my $ary (values %$new) { # sort by nr (largest first)
for (@$ary) { @$_ = sort { $b->[0] <=> $a->[0] } @$_ }
}
$new->{ekeys} = \@ekeys;
sub repo_objs {
my ($self, $ibxish) = @_;
- my $ibx_coderepos = $ibxish->{coderepo} // return;
$ibxish->{-repo_objs} // do {
+ my $ibx_coderepos = $ibxish->{coderepo} // return;
parse_cgitrc($self, undef, 0);
my $coderepos = $self->{-coderepos};
my @repo_objs;
$es;
}
+sub _fill_csrch ($$) {
+ my ($self, $name) = @_; # "" is a valid name for cindex
+ return if $name ne '' && !valid_foo_name($name, 'cindex');
+ eval { require PublicInbox::CodeSearch } or return;
+ my $pfx = "cindex.$name";
+ my $d = $self->{"$pfx.topdir"} // return;
+ -d $d or return;
+ if (index($d, "\n") >= 0) {
+ warn "E: `$d' must not contain `\\n'\n";
+ return;
+ }
+ my $csrch = PublicInbox::CodeSearch->new($d, $self);
+ for my $k (qw(localprefix)) {
+ my $v = $self->{"$pfx.$k"} // next;
+ $csrch->{$k} = _array($v);
+ }
+ $csrch->{name} = $name;
+ $csrch;
+}
+
+sub lookup_cindex ($$) {
+ my ($self, $name) = @_;
+ $self->{-csrch_by_name}->{$name} //= _fill_csrch($self, $name);
+}
+
+sub each_cindex {
+ my ($self, $cb, @arg) = @_;
+ my @csrch = map {
+ lookup_cindex($self, substr($_, length('cindex.'))) // ()
+ } grep(m!\Acindex\.[^\./]*\z!, @{$self->{-section_order}});
+ if (ref($cb) eq 'CODE') {
+ $cb->($_, @arg) for @csrch;
+ } else { # string function
+ $_->$cb(@arg) for @csrch;
+ }
+}
+
sub config_cmd {
my ($self, $env, $opt) = @_;
my $f = $self->{-f} // default_file();
map { ('-d', $_) } shard_dirs($_[0]);
}
+sub docids_by_postlist ($$) {
+ my ($self, $q) = @_;
+ my $cur = $self->xdb->postlist_begin($q);
+ my $end = $self->{xdb}->postlist_end($q);
+ my @ids;
+ for (; $cur != $end; $cur++) { push(@ids, $cur->get_docid) };
+ @ids;
+}
+
+sub get_doc ($$) {
+ my ($self, $docid) = @_;
+ eval { $self->{xdb}->get_document($docid) } // do {
+ die $@ if $@ && ref($@) !~ /\bDocNotFoundError\b/;
+ undef;
+ }
+}
+
1;
$smsg->{num};
}
-sub get_doc ($$) {
- my ($self, $docid) = @_;
- eval { $self->{xdb}->get_document($docid) } // do {
- die $@ if $@ && ref($@) !~ /\bDocNotFoundError\b/;
- undef;
- }
-}
-
sub _get_doc ($$) {
my ($self, $docid) = @_;
- get_doc($self, $docid) // do {
+ $self->get_doc($docid) // do {
warn "E: #$docid missing in Xapian\n";
undef;
}
# so user_cb never references the SolverGit object
sub new {
my ($class, $ibx, $user_cb, $uarg) = @_;
+ my $gits = $ibx ? $ibx->{-repo_objs} : undef;
+
+ # FIXME: cindex --join= is super-aggressive and may hit too many
+ $gits = [ @$gits[0..2] ] if $gits && @$gits > 3;
bless { # $ibx is undef if coderepo only (see WwwCoderepo)
- gits => $ibx ? $ibx->{-repo_objs} : undef,
+ gits => $gits,
user_cb => $user_cb,
uarg => $uarg,
# -cur_di, -qsp_err, -msg => temp fields for Qspawn callbacks
# allow user to easily browse the range around this message if
# they have ->over
$ctx->{-t_max} = $smsg->{ts};
- $ctx->{-spfx} = '../' if $ibx->{coderepo};
+ $ctx->{-spfx} = '../' if $ibx->{-repo_objs};
PublicInbox::WwwStream::aresponse($ctx, \&msg_page_i);
}
my $ibx = $ctx->{ibx};
my ($nr, $msgs) = $ibx->over->get_thread($mid);
return missing_thread($ctx) if $nr == 0;
- $ctx->{-spfx} = '../../' if $ibx->{coderepo};
+ $ctx->{-spfx} = '../../' if $ibx->{-repo_objs};
# link $INBOX_DIR/description text to "index_topics" view around
# the newest message in this thread
<input type=submit value=search
/>\t(<a href=${upfx}_/text/help/#search>help</a>)</pre></form>
EOM
+ # TODO: related codesearch
+ # my $csrchv = $ctx->{ibx}->{-csrch} // [];
+ # push @related, '<pre>'.ascii_html(Dumper($csrchv)).'</pre>';
}
if ($ctx->{ibx}->over) {
my $t = ts2str($ctx->{-t_max});
}
$pi_cfg->ALL and require PublicInbox::Isearch;
$self->cgit;
+ $self->coderepo;
$self->stylesheets_prepare($_) for ('', '../', '../../');
$self->news_www;
}
use PublicInbox::WwwStatic qw(r);
use PublicInbox::GitHTTPBackend;
use PublicInbox::WwwStream;
-use PublicInbox::Hval qw(ascii_html utf8_maybe);
+use PublicInbox::Hval qw(prurl ascii_html utf8_maybe);
use PublicInbox::ViewDiff qw(uri_escape_path);
use PublicInbox::RepoSnapshot;
use PublicInbox::RepoAtom;
use PublicInbox::RepoTree;
use PublicInbox::OnDestroy;
+use URI::Escape qw(uri_escape_utf8);
+use File::Spec;
my @EACH_REF = (qw(git for-each-ref --sort=-creatordate),
"--format=%(HEAD)%00".join('%00', map { "%($_)" }
my $eidx = $pi_cfg->lookup_ei($k) // next;
$pi_cfg->repo_objs($eidx);
}
+ $pi_cfg->each_cindex('load_coderepos', $pi_cfg);
}
sub new {
"</a>$align ", ascii_html($s), " ($cd)", @snap_fmt, "\n");
}
+sub emit_joined_inboxes ($) {
+ my ($ctx) = @_;
+ my $names = $ctx->{git}->{ibx_names}; # coderepo directives in config
+ my $score = $ctx->{git}->{ibx_score}; # generated w/ cindex --join
+ ($names || $score) or return;
+ my $pi_cfg = $ctx->{wcr}->{pi_cfg};
+ my ($u, $h);
+ my $zfh = $ctx->zfh;
+ print $zfh "\n# associated public inboxes:",
+ "\n# (number on the left is used for dev purposes)";
+ my @ns = map { [ 0, $_ ] } @$names;
+ my $env = $ctx->{env};
+ for (@ns, @$score) {
+ my ($nr, $name) = @$_;
+ my $ibx = $pi_cfg->lookup_name($name) // do {
+ warn "W: inbox `$name' gone for $ctx->{git}->{git_dir}";
+ say $zfh '# ', ascii_html($name), ' (missing inbox?)';
+ next;
+ };
+ if (scalar(@{$ibx->{url} // []})) {
+ $u = $h = ascii_html(prurl($env, $ibx->{url}));
+ } else {
+ $h = ascii_html(prurl($env, uri_escape_utf8($name)));
+ $h .= '/';
+ $u = ascii_html($name);
+ }
+ if ($nr) {
+ printf $zfh "\n% 11u", $nr;
+ } else {
+ print $zfh "\n", ' 'x11;
+ }
+ print $zfh qq{ <a\nhref="$h">$u</a>};
+ }
+}
+
sub summary_END { # called via OnDestroy
my ($ctx) = @_;
my $wcb = delete($ctx->{-wcb}) or return; # already done
for (@r) { print $zfh _refs_tags_link($_, './', $snap_pfx, @snap_fmt) }
print $zfh $NO_TAGS if !@r;
print $zfh qq(<a href="refs/tags/">...</a>\n) if $last;
+ emit_joined_inboxes $ctx;
$wcb->($ctx->html_done('</pre>'));
}
use v5.10.1;
use PublicInbox::Linkify;
use PublicInbox::WwwStream;
-use PublicInbox::Hval qw(ascii_html prurl);
+use PublicInbox::Hval qw(ascii_html prurl fmt_ts);
use HTTP::Date qw(time2str);
use URI::Escape qw(uri_escape_utf8);
use PublicInbox::GzipFilter qw(gzf_maybe);
sub coderepos_raw ($$) {
my ($ctx, $top_url) = @_;
- my $cr = $ctx->{ibx}->{coderepo} // return ();
my $cfg = $ctx->{www}->{pi_cfg};
+ my $cr = $cfg->repo_objs($ctx->{ibx}) or return ();
my $buf = 'Code repositories for project(s) associated with this '.
- $ctx->{ibx}->thing_type . "\n";
- for my $git (@{$ctx->{www}->{pi_cfg}->repo_objs($ctx->{ibx})}) {
+ $ctx->{ibx}->thing_type . ":\n";
+ my @recs = map { [ 0, $_ ] } @$cr;
+ my @todo = @recs;
+ $cfg->each_cindex('load_commit_times', \@todo);
+ @recs = sort { $b->[0] <=> $a->[0] } @recs;
+ my $cr_score = $ctx->{ibx}->{-cr_score};
+ for (@recs) {
+ my ($t, $git) = @$_;
for ($git->pub_urls($ctx->{env})) {
my $u = m!\A(?:[a-z\+]+:)?//!i ? $_ : $top_url.$_;
- $buf .= "\n\t" . prurl($ctx->{env}, $u);
+ my $nr = $cr_score->{$git->{nick}};
+ $buf .= "\n";
+ $buf .= $nr ? sprintf('% 9u', $nr) : (' 'x9);
+ $buf .= ' '.fmt_ts($t).' '.prurl($ctx->{env}, $u);
}
}
($buf);
use PublicInbox::TestCommon;
use Cwd qw(getcwd abs_path);
use List::Util qw(sum);
-use autodie qw(close open rename);
+use autodie qw(close mkdir open rename);
require_mods(qw(json Xapian +SCM_RIGHTS));
use_ok 'PublicInbox::CodeSearchIdx';
use PublicInbox::Import;
}
File::Path::remove_tree("$tmp/ext");
-ok(mkdir("$tmp/ext", 0707), 'create $tmp/ext with odd permissions');
+mkdir("$tmp/ext", 0707);
ok(run_script([qw(-cindex --dangerous -q -d), "$tmp/ext", $zp]),
'external on existing dir');
{
'non-Xapian-enabled inbox noted');
}
+# we need to support blank sections for a top-level repos
+# (e.g. <https://example.com/my-project>
+# git.kernel.org could use "pub" as section name, though, since all git repos
+# are currently under //git.kernel.org/pub/**/*
+{
+ mkdir(my $d = "$tmp/blanksection");
+ my $cfg = cfg_new($d, <<EOM);
+[cindex ""]
+ topdir = $tmp/ext
+ localprefix = $tmp
+EOM
+ my $csrch = $cfg->lookup_cindex('');
+ is ref($csrch), 'PublicInbox::CodeSearch', 'codesearch w/ blank name';
+ is_deeply $csrch->{localprefix}, [ "$tmp" ], 'localprefix respected';
+ my $nr = 0;
+ $cfg->each_cindex(sub {
+ my ($cs, @rest) = @_;
+ is $cs->{topdir}, $csrch->{topdir}, 'each_cindex works';
+ is_deeply \@rest, [ '.' ], 'got expected arg';
+ ++$nr;
+ }, '.');
+ is $nr, 1, 'iterated through cindices';
+}
+
done_testing;
use_ok 'PublicInbox::WWW';
my $cfg = PublicInbox::Config->new;
my $www = PublicInbox::WWW->new($cfg);
+$www->preload;
my $app = sub {
my $env = shift;
$env->{'psgi.errors'} = \*STDERR;
skip(qq{[publicinbox "$ibx_name"] not configured},
scalar(@$urls));
}
- if (!defined($ibx->{coderepo})) {
+ if (!defined($ibx->{-repo_objs})) {
push @gone, $ibx_name;
skip(qq{publicinbox.$ibx_name.coderepo not configured},
scalar(@$urls));