From 3609786b28b09bf33b3dd396b9bb413ac5bfb9aa Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 5 Mar 2025 07:18:32 +0000 Subject: [PATCH] search: make `d:' search prefix consistently date-only While `d:' previously treated YYYYMMDD and YYYY-MM-DD as low-precision by assuming 00:00:00 for the HH:MM:SS portion, it would not do so for dates passed to git-rev-parse (e.g. "last.year") since the HH:MM:SS of the current time would be used by git. So stop remapping `d:' in queries to `dt:' and continue using `d:' in the YYYYMMDD column. This does break things like `d:2.hours.ago..' by causing too many (or too few) results to be returned due to lack of precision, but I expect small time ranges of less than one day to be of limited use. `dt:' remains a higher-precision field for searching on both date and time from the Date: header, while `d:' is now always date-only. --- lib/PublicInbox/Search.pm | 25 ++++++++++++++++--------- t/search.t | 25 +++++++++++-------------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 52e1f3356..4773808d5 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -341,16 +341,23 @@ sub date_parse_prepare { # expand "dt:2010-10-02" => "dt:2010-10-02..2010-10-03" and like # n.b. git doesn't do YYYYMMDD w/o '-', it needs YYYY-MM-DD - # We upgrade "d:" to "dt:" unconditionally if ($pfx eq 'd') { - $pfx = 'dt'; - # upgrade YYYYMMDD to YYYYMMDDHHMMSS - $_ .= ' 00:00:00' for (grep(m!\A[0-9]{4}[^[:alnum:]] - [0-9]{2}[^[:alnum:]] - [0-9]{2}\z!x, @r)); - $_ .= '000000' for (grep(m!\A[0-9]{8}\z!, @r)); - } - if ($pfx eq 'dt') { + if (!defined($r[1])) { # git needs gaps and not /\d{14}/ + if ($r[0] =~ /\A([0-9]{4})([0-9]{2})([0-9]{2})\z/) { + push @$to_parse, "$1-$2-$3 00:00:00"; + } else { + push @$to_parse, $r[0]; + } + $r[0] = "\0%Y%m%d$#$to_parse\0"; + $r[1] = "\0%Y%m%d+\0"; + } else { + for (@r) { + next if $_ eq '' || /\A[0-9]{8}\z/; + push @$to_parse, $_; + $_ = "\0%Y%m%d$#$to_parse\0"; + } + } + } elsif ($pfx eq 'dt') { if (!defined($r[1])) { # git needs gaps and not /\d{14}/ if ($r[0] =~ /\A([0-9]{4})([0-9]{2})([0-9]{2}) ([0-9]{2})([0-9]{2})([0-9]{2})\z/x) { diff --git a/t/search.t b/t/search.t index a0f257699..e793f55b1 100644 --- a/t/search.t +++ b/t/search.t @@ -584,13 +584,10 @@ SKIP: { skip 'too close to midnight, time is tricky', 6; } $q = $s->query_argv_to_string($g, [qw(d:20101002 blah)]); - is($q, 'dt:20101002000000..20101003000000 blah', - 'YYYYMMDD expanded to range'); + is $q, 'd:20101002..20101003 blah', 'YYYYMMDD expanded to range'; $q = $s->query_argv_to_string($g, [qw(d:2010-10-02)]); - is($q, 'dt:20101002000000..20101003000000', - 'YYYY-MM-DD expanded to range'); + is $q, 'd:20101002..20101003', 'YYYY-MM-DD expanded to range'; $q = $s->query_argv_to_string($g, [qw(rt:2010-10-02.. yy)]); - diag "q=$q"; $q =~ /\Art:(\d+)\.\. yy/ or fail("rt: expansion failed: $q"); is(strftime('%Y-%m-%d', gmtime($1//0)), '2010-10-02', 'rt: beg expand'); $q = $s->query_argv_to_string($g, [qw(rt:..2010-10-02 zz)]); @@ -637,8 +634,8 @@ SKIP: { $orig = $qs = qq[f:bob "hello world" d:1993-10-02..2010-10-02]; $s->query_approxidate($g, $qs); - is($qs, qq[f:bob "hello world" dt:19931002000000..20101002000000], - 'post-phrase date corrected'); + is $qs, qq[f:bob "hello world" d:19931002..20101002], + 'post-phrase date corrected'; # Xapian uses "" to escape " inside phrases, we don't explictly # handle that, but are able to pass the result through unchanged @@ -649,28 +646,28 @@ SKIP: { is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y)); $s->query_approxidate($g, my $tmp = "$qs d:..2010-10-02"); - is($tmp, "$orig dt:..20101002000000", - 'two phrases did not throw off date parsing'); + is $tmp, "$orig d:..20101002", + 'two phrases did not throw off date parsing'; $orig = $qs = qq[${x}hello d:1993-10-02..$y$x world$y]; $s->query_approxidate($g, $qs); is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y)); $s->query_approxidate($g, $tmp = "$qs d:..2010-10-02"); - is($tmp, "$orig dt:..20101002000000", - 'two phrases did not throw off date parsing'); + is $tmp, "$orig d:..20101002", + 'two phrases did not throw off date parsing'; } my $x_days_ago = strftime('%Y%m%d', gmtime(time - (5 * 86400))); $orig = $qs = qq[broken d:5.days.ago..]; $s->query_approxidate($g, $qs); - like($qs, qr/\Abroken dt:$x_days_ago[0-9]{6}\.\./, + like($qs, qr/\Abroken d:$x_days_ago\.\./, 'date.phrase.with.dots'); $orig = $qs = 'd:20101002..now'; $s->query_approxidate($g, $qs); - like($qs, qr/\Adt:20101002000000\.\.[0-9]{14}\z/, - 'approxidate on range-end only'); + like $qs, qr/\Ad:20101002\.\.[0-9]{8}\z/, + 'approxidate on range-end only'; $ENV{TEST_EXPENSIVE} or skip 'TEST_EXPENSIVE not set for argv overflow check', 1; -- 2.47.3