}
}
+sub term_length_extract ($) {
+ my ($req) = @_;
+ @{$req->{A_len}} = map {
+ my $len = s/([0-9]+)\z// ? ($1 + 0) : undef;
+ [ $_, $len ];
+ } @{$req->{A}};
+}
+
sub dump_ibx_iter ($$$) {
my ($req, $ibx_id, $it) = @_;
my $out = $req->{0};
eval {
my $doc = $it->get_document;
- for my $p (@{$req->{A}}) {
- for (xap_terms($p, $doc)) {
+ for my $pair (@{$req->{A_len}}) {
+ my ($pfx, $len) = @$pair;
+ my @t = xap_terms($pfx, $doc);
+ @t = grep { length == $len } @t if defined($len);
+ for (@t) {
print $out "$_ $ibx_id\n" or die "print: $!";
++$req->{nr_out};
}
my ($req, $ibx_id, $qry_str) = @_;
$qry_str // die 'usage: dump_ibx [OPTIONS] IBX_ID QRY_STR';
$req->{A} or die 'dump_ibx requires -A PREFIX';
+ term_length_extract $req;
my $max = $req->{'m'} // $req->{srch}->{xdb}->get_doccount;
my $opt = { relevance => -1, limit => $max, offset => $req->{o} // 0 };
$opt->{eidx_key} = $req->{O} if defined $req->{O};
eval {
my $doc = $it->get_document;
my $G = join(' ', map { $root2off->{$_} } xap_terms('G', $doc));
- for my $p (@{$req->{A}}) {
- for (xap_terms($p, $doc)) {
+ for my $pair (@{$req->{A_len}}) {
+ my ($pfx, $len) = @$pair;
+ my @t = xap_terms($pfx, $doc);
+ @t = grep { length == $len } @t if defined($len);
+ for (@t) {
$req->{wbuf} .= "$_ $G\n";
++$req->{nr_out};
}
my ($req, $root2off_file, $qry_str) = @_;
$qry_str // die 'usage: dump_roots [OPTIONS] ROOT2ID_FILE QRY_STR';
$req->{A} or die 'dump_roots requires -A PREFIX';
+ term_length_extract $req;
open my $fh, '<', $root2off_file;
my $root2off; # record format: $OIDHEX "\0" uint32_t
my @x = split(/\0/, read_all $fh);
struct req { // argv and pfxv point into global rbuf
char *argv[MY_ARG_MAX];
char *pfxv[MY_ARG_MAX]; // -A <prefix>
+ size_t *lenv; // -A <prefix>LENGTH
struct srch *srch;
char *Pgit_dir;
char *Oeidx_key;
sock_fd = -1; // break out of recv_loop
}
+#define CLEANUP_REQ __attribute__((__cleanup__(req_cleanup)))
+static void req_cleanup(void *ptr)
+{
+ struct req *req = (struct req *)ptr;
+ free(req->lenv);
+}
+
static void recv_loop(void) // worker process loop
{
static char rbuf[4096 * 33]; // per-process
while (sock_fd == 0) {
size_t len = sizeof(rbuf);
- struct req req = {};
+ CLEANUP_REQ struct req req = {};
+
if (!recv_req(&req, rbuf, &len))
continue;
if (req.fp[1])
// This file is only intended to be included by xap_helper.h
// it implements pieces used by CodeSearchIdx.pm
-static void dump_ibx_term(struct req *req, const char *pfx,
+static void term_length_extract(struct req *req)
+{
+ req->lenv = (size_t *)calloc(req->pfxc, sizeof(size_t));
+ if (!req->lenv)
+ EABORT("lenv = calloc(%d %zu)", req->pfxc, sizeof(size_t));
+ for (int i = 0; i < req->pfxc; i++) {
+ char *pfx = req->pfxv[i];
+ // extract trailing digits as length:
+ // $len = s/([0-9]+)\z// ? ($1+0) : 0
+ for (size_t j = 0; pfx[j]; j++) {
+ if (pfx[j] < '0' || pfx[j] > '9')
+ continue;
+ if (j == 0) {
+ warnx("W: `%s' not a valid prefix", pfx);
+ continue;
+ }
+ char *end;
+ unsigned long long tmp = strtoull(pfx + j, &end, 10);
+ if (*end || tmp >= (unsigned long long)SIZE_MAX) {
+ warnx("W: `%s' not recognized", pfx);
+ } else {
+ req->lenv[i] = (size_t)tmp;
+ pfx[j] = 0;
+ break;
+ }
+ }
+ }
+}
+
+static void dump_ibx_term(struct req *req, int p,
Xapian::Document *doc, const char *ibx_id)
{
Xapian::TermIterator cur = doc->termlist_begin();
Xapian::TermIterator end = doc->termlist_end();
+ const char *pfx = req->pfxv[p];
size_t pfx_len = strlen(pfx);
+ size_t term_len = req->lenv[p];
for (cur.skip_to(pfx); cur != end; cur++) {
std::string tn = *cur;
if (!starts_with(&tn, pfx, pfx_len)) break;
+ if (term_len > 0 && (tn.length() - pfx_len) != term_len)
+ continue;
fprintf(req->fp[0], "%s %s\n", tn.c_str() + pfx_len, ibx_id);
++req->nr_out;
}
try {
Xapian::Document doc = i->get_document();
for (int p = 0; p < req->pfxc; p++)
- dump_ibx_term(req, req->pfxv[p], &doc, ibx_id);
+ dump_ibx_term(req, p, &doc, ibx_id);
} catch (const Xapian::DatabaseModifiedError & e) {
req->srch->db->reopen();
return ITER_RETRY;
EABORT("setlinebuf(fp[0])"); // WTF?
req->asc = true;
req->sort_col = -1;
+ term_length_extract(req);
Xapian::MSet mset = mail_mset(req, req->argv[optind + 1]);
// @UNIQ_FOLD in CodeSearchIdx.pm can handle duplicate lines fine
// writes term values matching @pfx for a given @doc, ending the line
// with the contents of @root_offs
-static void dump_roots_term(struct req *req, const char *pfx,
+static void dump_roots_term(struct req *req, int p,
struct dump_roots_tmp *drt,
struct fbuf *root_offs,
Xapian::Document *doc)
{
Xapian::TermIterator cur = doc->termlist_begin();
Xapian::TermIterator end = doc->termlist_end();
+ const char *pfx = req->pfxv[p];
size_t pfx_len = strlen(pfx);
+ size_t term_len = req->lenv[p];
for (cur.skip_to(pfx); cur != end; cur++) {
std::string tn = *cur;
if (!starts_with(&tn, pfx, pfx_len)) break;
+ if (term_len > 0 && (tn.length() - pfx_len) != term_len)
+ continue;
fputs(tn.c_str() + pfx_len, drt->wbuf.fp);
fwrite(root_offs->ptr, root_offs->len, 1, drt->wbuf.fp);
++req->nr_out;
if (!root2offs_str(&root_offs, &doc))
return ITER_ABORT; // bad request, abort
for (int p = 0; p < req->pfxc; p++)
- dump_roots_term(req, req->pfxv[p], drt,
- &root_offs, &doc);
+ dump_roots_term(req, p, drt, &root_offs, &doc);
} catch (const Xapian::DatabaseModifiedError & e) {
req->srch->db->reopen();
return ITER_RETRY;
req->asc = true;
req->sort_col = -1;
Xapian::MSet mset = commit_mset(req, req->argv[optind + 1]);
+ term_length_extract(req);
fbuf_init(&drt.wbuf);
"#$docid $pfx as expected ($xhc->{impl})";
}
}
+ my $nr;
+ for my $i (7, 8, 39, 40) {
+ pipe($err_r, $err_w);
+ $r = $xhc->mkreq([ undef, $err_w ], qw(dump_roots -c -A),
+ "XDFPOST$i", (map { ('-d', $_) } @int),
+ $root2id_file, 'dt:19700101'.'000000..');
+ close $err_w;
+ @res = <$r>;
+ my @err = <$err_r>;
+ if (defined $nr) {
+ is scalar(@res), $nr,
+ "got expected results ($xhc->{impl})";
+ } else {
+ $nr //= scalar @res;
+ ok $nr, "got initial results ($xhc->{impl})";
+ }
+ my @oids = (join('', @res) =~ /^([a-f0-9]+) /gms);
+ is_deeply [grep { length == $i } @oids], \@oids,
+ "all OIDs match expected length ($xhc->{impl})";
+ my ($nr_out) = ("@err" =~ /nr_out=(\d+)/);
+ is $nr_out, scalar(@oids), "output count matches $xhc->{impl}"
+ or diag explain(\@res, \@err);
+ }
+ pipe($err_r, $err_w);
+ $r = $xhc->mkreq([ undef, $err_w ], qw(dump_ibx -A XDFPOST7),
+ @ibx_shard_args, qw(13 rt:0..));
+ close $err_w;
+ @res = <$r>;
+ my @err = <$err_r>;
+ my ($nr_out) = ("@err" =~ /nr_out=(\d+)/);
+ my @oids = (join('', @res) =~ /^([a-f0-9]{7}) /gms);
+ is $nr_out, scalar(@oids), "output count matches $xhc->{impl}" or
+ diag explain(\@res, \@err);
}
done_testing;