]> git.ipfire.org Git - thirdparty/public-inbox.git/commitdiff
lei_mirror: handle UTF-8 from manifest.js.gz properly
authorEric Wong <e@80x24.org>
Mon, 13 Mar 2023 12:00:23 +0000 (12:00 +0000)
committerEric Wong <e@80x24.org>
Mon, 13 Mar 2023 12:45:58 +0000 (12:45 +0000)
This should ensure we display the "git config gitweb.owner
$OWNER" command invocation properly and also ensures we set the
description properly without triggering wide character warnings.

Also tested with a smallish iproute2 repo
(/pub/scm/linux/kernel/git/toke/iproute2.git) using my mirror:

  public-inbox-clone --remote-manifest=pub/manifest.js.gz \
    --include='*/toke/iproute2.git' --inbox-config=never \
    https://80x24.org/lore $DST

Anyways, I'm fairly certain this change and its tests are
correct; but I still struggle to understand Perl's approach to
Unicode and it's interactions with various JSON implementations.

Fixes: 0830817c132cb105 ("lei_mirror: show non-ASCII owner properly w/ --verbose")
lib/PublicInbox/LeiMirror.pm
t/clone-coderepo.t

index 3ec8170fea09a143c5a881dba2a8cacca875c1fc..18932cf4ca5d2564d1d640dddc183846a473ac0b 100644 (file)
@@ -259,8 +259,7 @@ sub run_reap {
 sub start_cmd {
        my ($self, $cmd, $opt, $fini) = @_;
        do_reap($self);
-       utf8::decode(my $msg = "# @$cmd");
-       $self->{lei}->qerr($msg);
+       $self->{lei}->qerr("# @$cmd");
        return if $self->{dry_run};
        $LIVE->{spawn($cmd, undef, $opt)} = [ \&reap_cmd, $self, $cmd, $fini ]
 }
@@ -633,7 +632,7 @@ sub clone_v1 {
        }
 
        my $d = $self->{-ent} ? $self->{-ent}->{description} : undef;
-       $self->{'txt.description'} = $d if defined $d;
+       utf8::encode($self->{'txt.description'} = $d) if defined $d;
        (!defined($d) && !$end) and
                _get_txt_start($self, 'description', $fini);
 
@@ -823,6 +822,7 @@ sub update_ent {
        $new = $self->{-ent}->{owner} // return;
        $cur = $self->{-local_manifest}->{$key}->{owner} // "\0";
        return if $cur eq $new;
+       utf8::encode($new); # to octets
        my $cmd = [ qw(git config -f), "$dst/config", 'gitweb.owner', $new ];
        start_cmd($self, $cmd, { 2 => $self->{lei}->{2} });
 }
index 1f33a6d723df66fe571ed2920c8520f2c94715a0..3a5997c9abe1220ee627fae938392b06bb4649cb 100644 (file)
@@ -63,11 +63,13 @@ EOM
        my $env = { TEST_DOCROOT => "$tmpdir/src", PI_CONFIG => $pi_config };
        $td = start_script($cmd, $env, { 3 => $tcp });
        my $fp = sha1_hex(my $refs = xqx([@git, 'show-ref']));
+       my $alice = "\x{100}lice";
        $m = {
                '/a.git' => {
                        fingerprint => $fp,
                        modified => 1,
-                       owner => 'Alice',
+                       owner => $alice,
+                       description => "${alice}'s repo",
                },
                '/b.git' => {
                        fingerprint => $fp,
@@ -89,9 +91,11 @@ my $cmd = [qw(-clone --inbox-config=never --manifest= --project-list=
        --objstore= -p -q), $url, "$tmpdir/dst", '--exit-code'];
 ok(run_script($cmd), 'clone');
 is(xqx([qw(git config gitweb.owner)], { GIT_DIR => "$tmpdir/dst/a.git" }),
-       "Alice\n", 'a.git gitweb.owner set');
+       "\xc4\x80lice\n", 'a.git gitweb.owner set');
 is(xqx([qw(git config gitweb.owner)], { GIT_DIR => "$tmpdir/dst/b.git" }),
        "Bob\n", 'b.git gitweb.owner set');
+my $desc = PublicInbox::Git::try_cat("$tmpdir/dst/a.git/description");
+is($desc, "\xc4\x80lice's repo\n", 'description set');
 
 my $dst_pl = "$tmpdir/dst/projects.list";
 my $dst_mf = "$tmpdir/dst/manifest.js.gz";