From: Tomaz Solc Date: Sun, 25 Feb 2018 19:25:23 +0000 (+0100) Subject: Decode RFC 3986 "unreserved chars" in URLs. X-Git-Tag: AWSTATS_7_8~28^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F92%2Fhead;p=thirdparty%2FAWStats.git Decode RFC 3986 "unreserved chars" in URLs. This makes awstats treat "/foo" and "/%66%6f%6f" as equivalent. This change only affects some common characters from the ASCII range. It doesn't do any kind of utf-8 decoding (as per RFC 3986, see Section 2.3.) --- diff --git a/wwwroot/cgi-bin/awstats.pl b/wwwroot/cgi-bin/awstats.pl index b04ed6f8..5a7d809e 100755 --- a/wwwroot/cgi-bin/awstats.pl +++ b/wwwroot/cgi-bin/awstats.pl @@ -7906,6 +7906,22 @@ sub DecodeEncodedString { return $stringtodecode; } +#------------------------------------------------------------------------------ +# Function: Similar to DecodeEncodedString, but decode only +# RFC3986 "unreserved characters" +# Parameters: stringtodecode +# Input: None +# Output: None +# Return: decodedstring +#------------------------------------------------------------------------------ +sub DecodeRFC3986UnreservedString { + my $stringtodecode = shift; + + $stringtodecode =~ s/%([46][1-9A-F]|[57][0-9A]|3[0-9]|2D|2E|5F|7E)/pack("C", hex($1))/ieg; + + return $stringtodecode; +} + #------------------------------------------------------------------------------ # Function: Decode a precompiled regex value to a common regex value # Parameters: compiledregextodecode @@ -18718,6 +18734,14 @@ if ( $UpdateStats && $FrameName ne 'index' && $FrameName ne 'mainleft' ) # We keep a clean $field[$pos_url] and # we store original value for urlwithnoquery, tokenquery and standalonequery #--------------------------------------------------------------------------- + + # Decode "unreserved characters" - URIs with common ASCII characters + # percent-encoded are equivalent to their unencoded versions. + # + # See section 2.3. of RFC 3986. + + $field[$pos_url] = DecodeRFC3986UnreservedString($field[$pos_url]); + if ($URLNotCaseSensitive) { $field[$pos_url] = lc( $field[$pos_url] ); } # Possible URL syntax for $field[$pos_url]: /mydir/mypage.ext?param1=x¶m2=y#aaa, /mydir/mypage.ext#aaa, /