From: J. Nick Koston Date: Wed, 25 Apr 2018 18:53:38 +0000 (-0500) Subject: Optimize OptimizeArray X-Git-Tag: AWSTATS_7_8~23^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F97%2Fhead;p=thirdparty%2FAWStats.git Optimize OptimizeArray This function took 571 seconds to execute with 10k domains in the configuration file. After optimization it now takes about 20ms for the same workload. --- diff --git a/test/testunit/testoptimizearray.pl b/test/testunit/testoptimizearray.pl new file mode 100644 index 00000000..914a85da --- /dev/null +++ b/test/testunit/testoptimizearray.pl @@ -0,0 +1,106 @@ +#!/usr/bin/perl -w +# +use Test::More tests => 4; +use Test::Deep; + +my @arr = ( qr/ABc/, qr/abc/, qr/ccc/, qr/abc/, qr/ABc/ ); + +sub OptimizeArray { + my ( $array, $notcasesensitive ) = @_; + my %seen; + + if ($notcasesensitive) { + + # Case insensitive + my $uncompiled_regex; + return map { + $uncompiled_regex = UnCompileRegex($_); + !$seen{ lc $uncompiled_regex }++ ? qr/$uncompiled_regex/i : () + } @$array; + } + + # Case sensitive + return map { !$seen{$_}++ ? $_ : () } @$array; +} + +sub UnCompileRegex { + shift =~ /\(\?[-^\w]*:(.*)\)/; # Works with all perl + # shift =~ /\(\?[-\w]*:(.*)\)/; < perl 5.14 + return $1; +} + +sub OptimizeArray_old { + my $array = shift; + my @arrayunreg = map { UnCompileRegex($_) } @$array; + my $notcasesensitive = shift; + my $searchlist = 0; + if ($Debug) { + debug( "OptimizeArray (notcasesensitive=$notcasesensitive)", 4 ); + } + while ( $searchlist > -1 && @arrayunreg ) { + my $elemtoremove = -1; + OPTIMIZELOOP: + foreach my $i ( $searchlist .. ( scalar @arrayunreg ) - 1 ) { + + # Search if $i elem is already treated by another elem + foreach my $j ( 0 .. ( scalar @arrayunreg ) - 1 ) { + if ( $i == $j ) { next; } + my $parami = + $notcasesensitive ? lc( $arrayunreg[$i] ) : $arrayunreg[$i]; + my $paramj = + $notcasesensitive ? lc( $arrayunreg[$j] ) : $arrayunreg[$j]; + if ($Debug) { + debug( " Compare $i ($parami) to $j ($paramj)", 4 ); + } + if ( index( $parami, $paramj ) > -1 ) { + if ($Debug) { + debug( + " Elem $i ($arrayunreg[$i]) already treated with elem $j ($arrayunreg[$j])", + 4 + ); + } + $elemtoremove = $i; + last OPTIMIZELOOP; + } + } + } + if ( $elemtoremove > -1 ) { + if ($Debug) { + debug( + " Remove elem $elemtoremove - $arrayunreg[$elemtoremove]", + 4 + ); + } + splice @arrayunreg, $elemtoremove, 1; + $searchlist = $elemtoremove; + } + else { + $searchlist = -1; + } + } + if ($notcasesensitive) { + return map { qr/$_/i } @arrayunreg; + } + return map { qr/$_/ } @arrayunreg; +} + +is_deeply( + [ OptimizeArray( \@arr, 0 ) ], + [ + qr/ABc/, + qr/abc/, + qr/ccc/, + ], + "case senitive OptimizeArray returns the expected results" +); +cmp_bag( [ OptimizeArray( \@arr, 0 ) ], [ OptimizeArray_old( \@arr, 0 ) ], "..and it matches the old method" ); +is_deeply( + [ OptimizeArray( \@arr, 1 ) ], + [ + qr/ABc/i, + qr/ccc/i, + ], + "case insenitive OptimizeArray returns the expected results" +); +cmp_bag( [ OptimizeArray( \@arr, 1 ) ], [ OptimizeArray_old( \@arr, 1 ) ], "..and it matches the old method" ) or diag explain [ OptimizeArray( \@arr, 1 ) ], [ OptimizeArray_old( \@arr, 1 ) ]; + diff --git a/wwwroot/cgi-bin/awstats.pl b/wwwroot/cgi-bin/awstats.pl index b04ed6f8..a30226ed 100755 --- a/wwwroot/cgi-bin/awstats.pl +++ b/wwwroot/cgi-bin/awstats.pl @@ -1351,57 +1351,21 @@ sub debug { # Return: None #------------------------------------------------------------------------------ sub OptimizeArray { - my $array = shift; - my @arrayunreg = map { UnCompileRegex($_) } @$array; - my $notcasesensitive = shift; - my $searchlist = 0; - if ($Debug) { - debug( "OptimizeArray (notcasesensitive=$notcasesensitive)", 4 ); - } - while ( $searchlist > -1 && @arrayunreg ) { - my $elemtoremove = -1; - OPTIMIZELOOP: - foreach my $i ( $searchlist .. ( scalar @arrayunreg ) - 1 ) { - - # Search if $i elem is already treated by another elem - foreach my $j ( 0 .. ( scalar @arrayunreg ) - 1 ) { - if ( $i == $j ) { next; } - my $parami = - $notcasesensitive ? lc( $arrayunreg[$i] ) : $arrayunreg[$i]; - my $paramj = - $notcasesensitive ? lc( $arrayunreg[$j] ) : $arrayunreg[$j]; - if ($Debug) { - debug( " Compare $i ($parami) to $j ($paramj)", 4 ); - } - if ( index( $parami, $paramj ) > -1 ) { - if ($Debug) { - debug( -" Elem $i ($arrayunreg[$i]) already treated with elem $j ($arrayunreg[$j])", - 4 - ); - } - $elemtoremove = $i; - last OPTIMIZELOOP; - } - } - } - if ( $elemtoremove > -1 ) { - if ($Debug) { - debug( - " Remove elem $elemtoremove - $arrayunreg[$elemtoremove]", - 4 ); - } - splice @arrayunreg, $elemtoremove, 1; - $searchlist = $elemtoremove; - } - else { - $searchlist = -1; - } - } - if ($notcasesensitive) { - return map { qr/$_/i } @arrayunreg; - } - return map { qr/$_/ } @arrayunreg; + my ( $array, $notcasesensitive ) = @_; + my %seen; + + if ($notcasesensitive) { + + # Case insensitive + my $uncompiled_regex; + return map { + $uncompiled_regex = UnCompileRegex($_); + !$seen{ lc $uncompiled_regex }++ ? qr/$uncompiled_regex/i : () + } @$array; + } + + # Case sensitive + return map { !$seen{$_}++ ? $_ : () } @$array; } #------------------------------------------------------------------------------