From: Eric Wong Date: Tue, 31 Mar 2020 08:49:36 +0000 (+0000) Subject: v2writable: index Message-IDs w/ spaces properly X-Git-Tag: v1.4.0~40 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=04e4a5573de1b9ed2f6528a0de568a1693882eea;p=thirdparty%2Fpublic-inbox.git v2writable: index Message-IDs w/ spaces properly Message-IDs can apparently contain spaces and other weird characters. Ensure we pass those properly to shard subprocesses when importing messages in parallel mode. Our NNTP request parser does not deal with spaces in the Message-ID, yet, and I don't expect most NNTP clients to, either. Nor does the Net::NNTP client handle them in responses. --- diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index 1ea01095c..06bcd4035 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -69,8 +69,9 @@ sub shard_worker_loop ($$$$$) { $self->remove_by_oid($oid, $mid); } else { chomp $line; - my ($bytes, $num, $blob, $mid, $ds, $ts) = - split(/ /, $line); + # n.b. $mid may contain spaces(!) + my ($bytes, $num, $blob, $ds, $ts, $mid) = + split(/ /, $line, 6); $self->begin_txn_lazy; my $n = read($r, my $msg, $bytes) or die "read: $!\n"; $n == $bytes or die "short read: $n != $bytes\n"; @@ -93,7 +94,8 @@ sub shard_worker_loop ($$$$$) { sub index_raw { my ($self, $msgref, $mime, $smsg) = @_; if (my $w = $self->{w}) { - print $w join(' ', @$smsg{qw(bytes num blob mid ds ts)}), + # mid must be last, it can contain spaces (but not LF) + print $w join(' ', @$smsg{qw(bytes num blob ds ts mid)}), "\n", $$msgref or die "failed to write shard $!\n"; } else { $$msgref = undef; diff --git a/t/v2writable.t b/t/v2writable.t index cdcfe4d06..66d5663e2 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -109,6 +109,11 @@ if ('ensure git configs are correct') { @mids = $mime->header_obj->header_raw('Message-Id'); like($mids[0], $sane_mid, 'mid was generated'); is(scalar(@mids), 1, 'new generated'); + + @warn = (); + $mime->header_set('Message-Id', ''); + ok($im->add($mime), 'message added with space in Message-Id'); + is_deeply([], \@warn); } { @@ -175,8 +180,13 @@ EOF is($uniq{$mid}++, 0, "MID for $num is unique in XOVER"); is_deeply($n->xhdr('Message-ID', $num), { $num => $mid }, "XHDR lookup OK on num $num"); + + # FIXME PublicInbox::NNTP (server) doesn't handle spaces in + # Message-ID, but neither does Net::NNTP (client) + next if $mid =~ / /; + is_deeply($n->xhdr('Message-ID', $mid), - { $mid => $mid }, "XHDR lookup OK on MID $num"); + { $mid => $mid }, "XHDR lookup OK on MID $mid ($num)"); } my %nn; foreach my $mid (@{$n->newnews(0, $group)}) {