use strict;
use v5.10.1;
use URI::Escape qw(uri_escape);
-use PublicInbox::MID qw(MID_ESC);
+use PublicInbox::Hval qw(ascii_html);
our $LIM = 200;
sub new {
}
my $qs = '';
if (defined(my $q = $self->{'q'})) {
- $q = uri_escape($q, MID_ESC);
+ # not using MID_ESC since that's for the path component and
+ # this is for the query component. Unlike MID_ESC,
+ # this disallows [\&\'\+=] and allows slash [/] for
+ # nicer looking dfn: queries
+ $q = uri_escape($q, '^A-Za-z0-9\-\._~!\$\(\)\*,;:@/');
$q =~ s/%20/+/g; # improve URL readability
- $qs .= "q=$q";
+ $qs .= 'q='.ascii_html($q);
}
if (my $o = $self->{o}) { # ignore o == 0
$qs .= "&o=$o";
my $digits = '10010260936330';
my $ua = 'Pine.LNX.4.10';
my $mid = "$ua.$digits.2460-100000\@penguin.transmeta.com";
-my $ibx = create_inbox 'git', indexlevel => 'full', tmpdir => "$tmpdir/1", sub {
+my $ibx = create_inbox '26-git', indexlevel => 'full', tmpdir => "$tmpdir/1",
+sub {
my ($im) = @_;
# n.b. these headers are not properly RFC2047-encoded
$im->add(PublicInbox::Eml->new(<<EOF)) or BAIL_OUT;
From: no subject at all <no-subject-at-all@example.com>
To: git@vger.kernel.org
+EOF
+ $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT;
+Message-ID: <ampersand@example.com>
+From: <e@example.com>
+To: git@vger.kernel.org
+Subject: git & ampersand
+
+hi +++ b/foo
+x=y
+s'more
+
EOF
};
is($res->code, 200, 'successful mbox download w/ threads');
gunzip(\($res->content) => \(my $after));
isnt($before, $after);
+
+ $res = $cb->(GET('/test/?q=git+%26+ampersand&x=A'));
+ is $res->code, 200, 'Atom hit with ampersand';
+ unlike $res->content, qr/git\+&\+ampersand/, '& is HTML-escaped';
+
+ $res = $cb->(GET('/test/?q=%22hi+%2b%2b%2b+b/foo%22&x=A'));
+ is $res->code, 200, 'slashes and plusses search hit';
+ like $res->content, qr!q=%22hi\+(?:%2[bB]){3}\+b/foo%22!,
+ '+ and " escaped, but slash not escaped in query';
+
+ $res = $cb->(GET(q{/test/?q=%22s'more%22&x=A}));
+ is $res->code, 200, 'single quote inside phrase';
+ # TODO: more tests and odd cases
});
done_testing();