From: Eric Wong Date: Tue, 21 Mar 2023 23:07:35 +0000 (+0000) Subject: cindex: attempt to give oldest commits lowest docids X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e8a6b99a45dfa8ae36bb6f95bc9aed4577014f29;p=thirdparty%2Fpublic-inbox.git cindex: attempt to give oldest commits lowest docids Monotonically increasing docids may help us avoid sorting output for the web and CLI, since recent commits are generally the most desired search results. `git log --reverse' incurs no extra overhead in this case, since `--stdin' will mean git buffers the commit list in memory before attempting to emit anything. --- diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index 176422d0d..f0b506da0 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -52,8 +52,12 @@ our $SEEN_MAX = 100000; # TODO: do we care about committer name + email? or tree OID? my @FMT = qw(H P ct an ae at s b); # (b)ody must be last + +# git log --stdin buffers all commits before emitting, thus --reverse +# doesn't incur extra overhead. We use --reverse to keep Xapian docids +# increasing so we may be able to avoid sorting results in some cases my @LOG_STDIN = (qw(log --no-decorate --no-color --no-notes -p --stat -M - --stdin --no-walk=unsorted), '--pretty=format:%n%x00'. + --reverse --stdin --no-walk=unsorted), '--pretty=format:%n%x00'. join('%n', map { "%$_" } @FMT)); sub new {