From 299c705fa81e1d171d602e0bf359c4f277e17eaa Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 18 Mar 2025 08:30:27 +0000 Subject: [PATCH] lg2: disable strict hash verification Unlike git(1), libgit2 verifies the SHA-(1|256) of objects it reads by default. This verification results in a large (nearly 100% w/ SHA1DC) performance penalty for us. Since our libgit2 code only reads (and never writes objects), just follow git(1) and skip verification for normal reads. This brings our libgit2-based Gcf2 batch loop performance closer to that of the `git cat-file --batch-command' as shown in the new xt/lg2_cmp.t developer test. However, Gcf2Client still uses a more verbose (but more flexible) input format and the Perl gcf2_loop still incurs normal Perl method dispatch overheads. --- MANIFEST | 1 + lib/PublicInbox/Lg2.pm | 13 ++++++----- xt/lg2_cmp.t | 51 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 xt/lg2_cmp.t diff --git a/MANIFEST b/MANIFEST index 321c652dc..93407a46b 100644 --- a/MANIFEST +++ b/MANIFEST @@ -670,6 +670,7 @@ xt/imapd-mbsync-oimap.t xt/imapd-validate.t xt/lei-auth-fail.t xt/lei-onion-convert.t +xt/lg2_cmp.t xt/mem-imapd-tls.t xt/mem-msgview.t xt/mem-nntpd-tls.t diff --git a/lib/PublicInbox/Lg2.pm b/lib/PublicInbox/Lg2.pm index 0ee9b3544..a4ea4b763 100644 --- a/lib/PublicInbox/Lg2.pm +++ b/lib/PublicInbox/Lg2.pm @@ -48,11 +48,14 @@ BEGIN { STDOUT->autoflush(1); $CFG{CCFLAGSEX} = $vals->{cflags}; $CFG{LIBS} = $vals->{libs}; - - # we use Capitalized and ALLCAPS for compatibility with old Inline::C - CORE::eval <<'EOM'; -use Inline C => Config => %CFG, BOOT => q[git_libgit2_init();]; -use Inline C => $c_src, BUILD_NOISY => 1; + my $boot = 'git_libgit2_init();'; + eval("v$vals->{modversion}") ge v0.26 and + $boot .= < Config => \%CFG, BOOT => \$boot; +use Inline C => \$c_src . "/* boot: $boot */\n", BUILD_NOISY => 1; EOM $err = $@; open(STDERR, '>&', $olderr); diff --git a/xt/lg2_cmp.t b/xt/lg2_cmp.t new file mode 100644 index 000000000..968e390d2 --- /dev/null +++ b/xt/lg2_cmp.t @@ -0,0 +1,51 @@ +#!perl -w +# Copyright (C) all contributors +# License: AGPL-3.0+ +use v5.12; +use Benchmark qw(:all); +use PublicInbox::TestCommon; +require_mods 'PublicInbox::Lg2'; +require_git v2.19; +require PublicInbox::Gcf2Client; +my $git_dir = $ENV{GIANT_GIT_DIR} // + plan skip_all => "GIANT_GIT_DIR not defined for $0"; +my $git = PublicInbox::Git->new($git_dir); +my @cat = qw[cat-file --buffer --batch-check=%(objectname) + --batch-all-objects --unordered]; +my $nr = $ENV{NR} || 100; +diag "NR=$nr"; +my $n = 0; +my $count = sub { ++$n }; + +my $gcf2c = PublicInbox::Gcf2Client::new(); +my $repo = " $git_dir\n"; +my ($lg2_total, $git_total); +my $lg2_async = timeit($nr, sub { + my $cat = $git->popen(@cat); + while (<$cat>) { + chomp; + $gcf2c->gcf2_async($_.$repo, $count); + } + $cat->close or xbail "cat: $?"; + $gcf2c->event_step while PublicInbox::Git::cat_active($gcf2c); + $lg2_total += $n; + $n = 0; +}); + +my $git_async = timeit($nr, sub { + my $cat = $git->popen(@cat); + while (<$cat>) { + chomp; + $git->cat_async($_, $count); + } + $cat->close or xbail "cat: $?"; + $git->async_wait_all; + $git_total += $n; + $n = 0; +}); + +diag 'git '.timestr($git_async); +diag 'lg2 '.timestr($lg2_async); +is $lg2_total, $git_total, 'libgit2 and git saw same number of requests'; + +done_testing; -- 2.47.3