my $op_p = delete($self->{1}) // die 'BUG: no {1} op_p';
my $batch_bytes = $self->{-opt}->{batch_size} //
$PublicInbox::SearchIdx::BATCH_BYTES;
+ my $max_size = $self->{-opt}->{max_size};
# local-ized in parent before fork
$TXN_BYTES = $batch_bytes;
local $self->{git} = $git; # for patchid
$self->begin_txn_lazy;
while (defined($buf = <$rd>)) {
chomp($buf);
+ if ($max_size && length($buf) >= $max_size) {
+ my ($H, undef) = split(/\n/, $buf, 2);
+ warn "W: skipping $H (", length($buf)," >= $max_size)\n";
+ next;
+ }
$TXN_BYTES -= length($buf);
@$cmt{@FMT} = split(/\n/, $buf, scalar(@FMT));
$/ = "\n";
--update | -u update previously-indexed code repos with `-d'
--jobs=NUM set or disable parallelization (NUM=0)
--batch-size=BYTES flush changes to OS after a given number of bytes
+ --max-size=BYTES do not index commit diffs larger than the given size
--prune prune old repos and commits
--reindex reindex previously indexed repos
--verbose | -v increase verbosity (may be repeated)
EOF
my $opt = { fsync => 1, scan => 1 }; # --no-scan is hidden
GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous
- indexlevel|index-level|L=s batch_size|batch-size=s
+ indexlevel|index-level|L=s
+ batch_size|batch-size=s max_size|max-size=s
project-list=s exclude=s@
d=s update|u scan! prune dry-run|n C=s@ help|h))
or die $help;