From 588fd47a842c5c4fbbabefec77860ecb8401af0c Mon Sep 17 00:00:00 2001 From: Tomaz Solc Date: Sun, 25 Feb 2018 20:25:23 +0100 Subject: [PATCH] Decode RFC 3986 "unreserved chars" in URLs. This makes awstats treat "/foo" and "/%66%6f%6f" as equivalent. This change only affects some common characters from the ASCII range. It doesn't do any kind of utf-8 decoding (as per RFC 3986, see Section 2.3.) --- wwwroot/cgi-bin/awstats.pl | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/wwwroot/cgi-bin/awstats.pl b/wwwroot/cgi-bin/awstats.pl index b04ed6f8..5a7d809e 100755 --- a/wwwroot/cgi-bin/awstats.pl +++ b/wwwroot/cgi-bin/awstats.pl @@ -7906,6 +7906,22 @@ sub DecodeEncodedString { return $stringtodecode; } +#------------------------------------------------------------------------------ +# Function: Similar to DecodeEncodedString, but decode only +# RFC3986 "unreserved characters" +# Parameters: stringtodecode +# Input: None +# Output: None +# Return: decodedstring +#------------------------------------------------------------------------------ +sub DecodeRFC3986UnreservedString { + my $stringtodecode = shift; + + $stringtodecode =~ s/%([46][1-9A-F]|[57][0-9A]|3[0-9]|2D|2E|5F|7E)/pack("C", hex($1))/ieg; + + return $stringtodecode; +} + #------------------------------------------------------------------------------ # Function: Decode a precompiled regex value to a common regex value # Parameters: compiledregextodecode @@ -18718,6 +18734,14 @@ if ( $UpdateStats && $FrameName ne 'index' && $FrameName ne 'mainleft' ) # We keep a clean $field[$pos_url] and # we store original value for urlwithnoquery, tokenquery and standalonequery #--------------------------------------------------------------------------- + + # Decode "unreserved characters" - URIs with common ASCII characters + # percent-encoded are equivalent to their unencoded versions. + # + # See section 2.3. of RFC 3986. + + $field[$pos_url] = DecodeRFC3986UnreservedString($field[$pos_url]); + if ($URLNotCaseSensitive) { $field[$pos_url] = lc( $field[$pos_url] ); } # Possible URL syntax for $field[$pos_url]: /mydir/mypage.ext?param1=x¶m2=y#aaa, /mydir/mypage.ext#aaa, / -- 2.47.2