FIX #24

author Laurent Destailleur <eldy@destailleur.fr>

Fri, 29 Apr 2016 09:49:06 +0000 (11:49 +0200)

committer Laurent Destailleur <eldy@destailleur.fr>

Fri, 29 Apr 2016 09:49:06 +0000 (11:49 +0200)
author Laurent Destailleur <eldy@destailleur.fr>
Fri, 29 Apr 2016 09:49:06 +0000 (11:49 +0200)
committer Laurent Destailleur <eldy@destailleur.fr>
Fri, 29 Apr 2016 09:49:06 +0000 (11:49 +0200)
diff --git a/make/fixdosfiles.sh b/make/fixdosfiles.sh

new file mode 100755 (executable)

index 0000000..72f4fca
--- /dev/null
+++ b/make/fixdosfiles.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+#------------------------------------------------------
+# Script to find files that are not Unix encoded
+#
+# Laurent Destailleur - eldy@users.sourceforge.net
+#------------------------------------------------------
+# Usage: fixdosfiles.sh [list|fix]
+#------------------------------------------------------
+
+# Syntax
+if [ "x$1" != "xlist" -a "x$1" != "xfix" ]
+then
+       echo "This script detect or clean files with CR+LF into files with LF only. All source files are included, also files into includes."
+       echo "Usage: fixdosfiles.sh [list|fix]"
+fi
+
+# To detec
+if [ "x$1" = "xlist" ]
+then
+       find . \( -iname "functions" -o -iname "*.md" -o -iname "*.html" -o -iname "*.htm" -o -iname "*.php" -o -iname "*.sh" -o -iname "*.cml" -o -iname "*.css" -o -iname "*.js" -o -iname "*.lang" -o -iname "*.pl" -o -iname "*.txt" -o -iname "*.xml" \) -exec file "{}" + | grep -v '\/test' | grep CRLF
+fi
+
+# To convert
+if [ "x$1" = "xfix" ]
+then
+       for fic in `find . \( -iname "functions" -o -iname "*.md" -o -iname "*.html" -o -iname "*.htm" -o -iname "*.php" -o -iname "*.sh" -o -iname "*.cml" -o -iname "*.css" -o -iname "*.js" -o -iname "*.lang" -o -iname "*.pl" -o -iname "*.txt" -o -iname "*.xml" \) -exec file "{}" + | grep -v '\/test' | grep CRLF | awk -F':' '{ print $1 }' `
+       do
+               echo "Fix file $fic"
+               dos2unix "$fic"
+       done;
+fi
diff --git a/make/fixutf8bomfiles.sh b/make/fixutf8bomfiles.sh

new file mode 100755 (executable)

index 0000000..bda503d
--- /dev/null
+++ b/make/fixutf8bomfiles.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+#
+# Checks of fix files contains UTF-8 BOM in dolibarr source tree,
+# excluding git repository, custom modules and included libraries.
+#
+# Raphaël Doursenaud - rdoursenaud@gpcsolutions.fr
+# Laurent Destailleur  eldy@users.sourceforge.net
+#------------------------------------------------------
+# Usage: fixutf8bomfiles.sh [list|fix]
+#------------------------------------------------------
+
+# Syntax
+if [ "x$1" != "xlist" -a "x$1" != "xfix" ]
+then
+    echo "Detect and fix bad UTF8 encoded files (UTF8 must not use BOM char)"
+       echo "Usage: fixutf8bomfiles.sh (list|fix) [addincludes]"
+fi
+
+if [ "x$2" != "xaddincludes" ]
+then
+       export moreoptions="--exclude-dir='includes'"
+fi
+
+# To detec
+if [ "x$1" = "xlist" ]
+then
+       #find . \( -iname '*.php' -print0 -o -iname '*.sh' -print0 -o -iname '*.pl' -print0 -o -iname '*.lang' -print0 -o -iname '*.txt' \) -print0 | xargs -0 awk '/^\xEF\xBB\xBF/ {print FILENAME} {nextfile}'
+       echo "grep -rlIZ --include='*.php' --include='*.sh' --include='*.pl' --include='*.lang' --include='*.txt' --exclude-dir='.git' --exclude-dir='.tx' $moreoptions --exclude-dir='custom' . . | xargs -0 awk '/^\xEF\xBB\xBF/ {print FILENAME} {nextfile}'"
+       grep -rlIZ --include='*.php' --include='*.sh' --include='*.pl' --include='*.lang' --include='*.txt' --exclude-dir='.git' --exclude-dir='.tx' $moreoptions --exclude-dir='custom' . . | xargs -0 awk '/^\xEF\xBB\xBF/ {print FILENAME} {nextfile}'
+fi
+
+# To convert
+if [ "x$1" = "xfix" ]
+then
+       for fic in `grep -rlIZ --include='*.php' --include='*.sh' --include='*.pl' --include='*.lang' --include='*.txt' --exclude-dir='.git' --exclude-dir='.tx' $moreoptions --exclude-dir='custom' . . | xargs -0 awk '/^\xEF\xBB\xBF/ {print FILENAME} {nextfile}'`
+       do
+               echo "Fixing $fic"
+               sed -i '1s/^\xEF\xBB\xBF//' $fic
+       done;
+fi
diff --git a/wwwroot/cgi-bin/awdownloadcsv.pl b/wwwroot/cgi-bin/awdownloadcsv.pl

index c43acaa72926ece75c52e1c951fcd196acb42ebd..eaa61ad1414913724aadfc50cd25540fc0d80216 100755 (executable)
--- a/wwwroot/cgi-bin/awdownloadcsv.pl
+++ b/wwwroot/cgi-bin/awdownloadcsv.pl
@@ -1,152 +1,152 @@
-#!/usr/bin/perl -w\r
-#------------------------------------------------------------------------------\r
-# Free addition to AWStats Web Log Analyzer. Used to export the contents of\r
-# sections of the Apache server log database to CSV for use in other tools.\r
-# Works from command line or as a CGI. \r
-#\r
-# This program is free software; you can redistribute it and/or modify\r
-# it under the terms of the GNU General Public License as published by\r
-# the Free Software Foundation; either version 2 of the License, or\r
-# (at your option) any later version.\r
-#\r
-# This program is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
-# GNU General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU General Public License\r
-# along with this program. If not, see <http://www.gnu.org/licenses/>.\r
-#------------------------------------------------------------------------------\r
-use CGI qw(:standard);\r
-\r
-my $ALLOWDOWNLOAD=0;\r
-\r
-# Disabled by default for security reason\r
-if (! $ALLOWDOWNLOAD) \r
-{\r
-       print("Error: You must first edit script to change ALLOWDOWNLOAD to 1 to allow usage of this script.\n");\r
-       print("Reason is that enabling this script may be a security hole as it allows someone to download/view details of your awstats data files.\n");\r
-       exit;\r
-}\r
-\r
-my $q               = new CGI;\r
-my $outputFile      = "";   # used to write the output to a file\r
-my $inputFile       = "";   # the fully qualified path to the input log database file\r
-my $sectionToReport = "";   # contains the tag to search for in the database file\r
-my $startSearchStr  = "BEGIN_";\r
-my $endSearchStr    = "END_";\r
-my $startPrinting   = 0;    # flag to indicate that the start tag has been found\r
-my $attachFileName  = "";\r
-\r
-# These parameters are used to build the input file name of the awstats log database\r
-my $baseName        = "";\r
-my $month           = "";\r
-my $year            = "";\r
-my $day             = "";\r
-my $siteConfig      = "";\r
-\r
-if ($q->param("outputFile")) {\r
-  if ($outputFile eq '') { $outputFile = $q->param("outputFile"); }\r
-}\r
-\r
-if ($q->param("inputFile")) {\r
-  if ($inputFile eq '') { $inputFile = $q->param("inputFile"); }\r
-}\r
-\r
-if ($q->param("section")) {\r
-  if ($sectionToReport eq '' ) { $sectionToReport = $q->param("section"); }\r
-}\r
-\r
-if ($q->param("baseName")) {\r
-  if ($baseName eq '' ) { $baseName = $q->param("baseName"); }\r
-}\r
-\r
-if ($q->param("month")) {\r
-  if ($month eq '' ) { $month = $q->param("month"); }\r
-}\r
-\r
-if ($q->param("year")) {\r
-  if ($year eq '' ) { $year = $q->param("year"); }\r
-}\r
-\r
-if ($q->param("day")) { $day = $q->param("day"); }\r
-\r
-if ($q->param("siteConfig")) {\r
-  if ($siteConfig eq '' ) { $siteConfig = $q->param("siteConfig"); }\r
-}\r
-\r
-# set the attachment file name to the report section\r
-if ($sectionToReport ne '' ) {\r
-  $attachFileName = $sectionToReport . ".csv";\r
-} else {\r
-  $attachFileName = "exportCSV.csv";\r
-}\r
-print $q->header(-type=> "application/force-download", -attachment=>$attachFileName);\r
-\r
-# Build the start/end search tags\r
-$startSearchStr = $startSearchStr . $sectionToReport;\r
-$endSearchStr   = $endSearchStr . $sectionToReport;\r
-\r
-if ( !$inputFile ) { $inputFile ="$baseName$month$year$day.$siteConfig.txt" };\r
-\r
-open (IN, $inputFile) || die "cannot open $inputFile\n";\r
-\r
-# If there's a parameter for the output, open it here\r
-if ($outputFile ne '') {\r
-  open (OUT,">$outputFile") || die "cannot create $outputFile\n";\r
-  flock (OUT, 2);\r
-}\r
-# Loop through the input file searching for the start string. When\r
-# found, start displaying the input lines (with spaces changed\r
-# to commas) until the end tag is found.\r
-\r
-# Array to store comments for printing once we hit the desired section\r
-my $commentCount = -1;\r
-my %commentArray;\r
-\r
-while (<IN>) {\r
-  chomp;\r
-\r
-  if (/^#\s(.*-)\s/){    # search for comment lines\r
-    s/ - /,/g;   # replace dashes with commas\r
-    s/#//;       # get rid of the comment sign\r
-    $commentArray[++$commentCount] = $_;\r
-  }\r
-\r
-  # put the test to end printing here to eliminate printing\r
-  # the line with the END tag\r
-  if (/^$endSearchStr\b/) {\r
-    $startPrinting = 0;\r
-  }\r
-\r
-  if ($startPrinting) {\r
-    s/ /,/g;\r
-    print "$_\n";\r
-    if ($outputFile ne '') {\r
-      print OUT "$_\n";\r
-    }\r
-  }\r
-  # if we find an END tag and we haven't started printing, reset the\r
-  # comment array to start re-capturing comments for next section\r
-  if ((/^END_/) && ($startPrinting == 0)) {\r
-    $commentCount = -1;\r
-  }\r
-\r
-  # put the start printing test after the first input line\r
-  # to eliminate printing the line with the BEGIN tag...find it\r
-  # here, then start printing on the next input line\r
-  if (/^$startSearchStr\b/) {\r
-    $startPrinting = 1;\r
-    # print the comment array - it provides labels for the columns\r
-    for ($i = 0; $i <= $commentCount; $i++ ) {\r
-    print "$commentArray[$i]\n";\r
-    }\r
-  }\r
-}\r
-\r
-close(IN);\r
-\r
-# Close the output file if there was one used\r
-if ($outputFile ne '') {\r
-  close(OUT);\r
-}\r
+#!/usr/bin/perl -w
+#------------------------------------------------------------------------------
+# Free addition to AWStats Web Log Analyzer. Used to export the contents of
+# sections of the Apache server log database to CSV for use in other tools.
+# Works from command line or as a CGI. 
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#------------------------------------------------------------------------------
+use CGI qw(:standard);
+
+my $ALLOWDOWNLOAD=0;
+
+# Disabled by default for security reason
+if (! $ALLOWDOWNLOAD) 
+{
+       print("Error: You must first edit script to change ALLOWDOWNLOAD to 1 to allow usage of this script.\n");
+       print("Reason is that enabling this script may be a security hole as it allows someone to download/view details of your awstats data files.\n");
+       exit;
+}
+
+my $q               = new CGI;
+my $outputFile      = "";   # used to write the output to a file
+my $inputFile       = "";   # the fully qualified path to the input log database file
+my $sectionToReport = "";   # contains the tag to search for in the database file
+my $startSearchStr  = "BEGIN_";
+my $endSearchStr    = "END_";
+my $startPrinting   = 0;    # flag to indicate that the start tag has been found
+my $attachFileName  = "";
+
+# These parameters are used to build the input file name of the awstats log database
+my $baseName        = "";
+my $month           = "";
+my $year            = "";
+my $day             = "";
+my $siteConfig      = "";
+
+if ($q->param("outputFile")) {
+  if ($outputFile eq '') { $outputFile = $q->param("outputFile"); }
+}
+
+if ($q->param("inputFile")) {
+  if ($inputFile eq '') { $inputFile = $q->param("inputFile"); }
+}
+
+if ($q->param("section")) {
+  if ($sectionToReport eq '' ) { $sectionToReport = $q->param("section"); }
+}
+
+if ($q->param("baseName")) {
+  if ($baseName eq '' ) { $baseName = $q->param("baseName"); }
+}
+
+if ($q->param("month")) {
+  if ($month eq '' ) { $month = $q->param("month"); }
+}
+
+if ($q->param("year")) {
+  if ($year eq '' ) { $year = $q->param("year"); }
+}
+
+if ($q->param("day")) { $day = $q->param("day"); }
+
+if ($q->param("siteConfig")) {
+  if ($siteConfig eq '' ) { $siteConfig = $q->param("siteConfig"); }
+}
+
+# set the attachment file name to the report section
+if ($sectionToReport ne '' ) {
+  $attachFileName = $sectionToReport . ".csv";
+} else {
+  $attachFileName = "exportCSV.csv";
+}
+print $q->header(-type=> "application/force-download", -attachment=>$attachFileName);
+
+# Build the start/end search tags
+$startSearchStr = $startSearchStr . $sectionToReport;
+$endSearchStr   = $endSearchStr . $sectionToReport;
+
+if ( !$inputFile ) { $inputFile ="$baseName$month$year$day.$siteConfig.txt" };
+
+open (IN, $inputFile) || die "cannot open $inputFile\n";
+
+# If there's a parameter for the output, open it here
+if ($outputFile ne '') {
+  open (OUT,">$outputFile") || die "cannot create $outputFile\n";
+  flock (OUT, 2);
+}
+# Loop through the input file searching for the start string. When
+# found, start displaying the input lines (with spaces changed
+# to commas) until the end tag is found.
+
+# Array to store comments for printing once we hit the desired section
+my $commentCount = -1;
+my %commentArray;
+
+while (<IN>) {
+  chomp;
+
+  if (/^#\s(.*-)\s/){    # search for comment lines
+    s/ - /,/g;   # replace dashes with commas
+    s/#//;       # get rid of the comment sign
+    $commentArray[++$commentCount] = $_;
+  }
+
+  # put the test to end printing here to eliminate printing
+  # the line with the END tag
+  if (/^$endSearchStr\b/) {
+    $startPrinting = 0;
+  }
+
+  if ($startPrinting) {
+    s/ /,/g;
+    print "$_\n";
+    if ($outputFile ne '') {
+      print OUT "$_\n";
+    }
+  }
+  # if we find an END tag and we haven't started printing, reset the
+  # comment array to start re-capturing comments for next section
+  if ((/^END_/) && ($startPrinting == 0)) {
+    $commentCount = -1;
+  }
+
+  # put the start printing test after the first input line
+  # to eliminate printing the line with the BEGIN tag...find it
+  # here, then start printing on the next input line
+  if (/^$startSearchStr\b/) {
+    $startPrinting = 1;
+    # print the comment array - it provides labels for the columns
+    for ($i = 0; $i <= $commentCount; $i++ ) {
+    print "$commentArray[$i]\n";
+    }
+  }
+}
+
+close(IN);
+
+# Close the output file if there was one used
+if ($outputFile ne '') {
+  close(OUT);
+}
diff --git a/wwwroot/cgi-bin/lang/awstats-cn.txt b/wwwroot/cgi-bin/lang/awstats-cn.txt

index cb6e7bc084fa18fa9b6ded8cd70bcdcb1be18e40..0e797f7e16212c3c9812490224a2d5f7cd41ebbd 100644 (file)
--- a/wwwroot/cgi-bin/lang/awstats-cn.txt
+++ b/wwwroot/cgi-bin/lang/awstats-cn.txt
@@ -1,182 +1,182 @@
-# Chinese (simplified) message file (by Che Dong chedongATgmail.com)\r
-# $Revision$ - $Date$\r
-PageCode=GBK\r
-message0=ÎÞ·¨µÃÖª\r
-message1=ÎÞ·¨µÃÖª£¨²»ÄÜ·´Ïò½âÎöµÄÍøÓòÃû³Æ£©\r
-message2=ÆäËû\r
-message3=²é¿´ÏêÏ¸×ÊÁÏ\r
-message4=ÈÕÆÚ\r
-message5=ÔÂ\r
-message6=Äê\r
-message7=Í³¼ÆÍøÕ¾\r
-message8=Ê×´Î²Î¹ÛÈÕÆÚ\r
-message9=×î½ü²Î¹ÛÈÕÆÚ\r
-message10=²Î¹ÛÈË´Î\r
-message11=²Î¹ÛÕß\r
-message12=²Î¹Û\r
-message13=¸ö¹Ø¼ü×Ö´Ê\r
-message14=ËÑË÷\r
-message15=°Ù·Ö±È\r
-message16=Á÷Á¿Í³¼Æ\r
-message17=ÍøÓò»ò¹ú¼Ò\r
-message18=²Î¹ÛÕß\r
-message19=URL ÍøÖ·\r
-message20=Ã¿Ð¡Ê±ä¯ÀÀ´ÎÊý\r
-message21=ä¯ÀÀÆ÷\r
-message22=HTTP ´íÎó\r
-message23=·´ÏàÁ´½Ó\r
-message24=´ÓÎ´¸üÐÂ£¨Çë²Î¿¼ awstats_setup.htmlÉÏµÄ 'Build/Update'£©\r
-message25=²Î¹ÛÕßµÄÍøÓò»ò¹ú¼Ò\r
-message26=Ö÷»úÊý\r
-message27=ÍøÒ³Êý\r
-message28=¸ö²»Í¬µÄÍøÒ³\r
-message29=´æÈ¡´ÎÊý\r
-message30=²»Í¬µÄ×Ö´Ê\r
-message31=ÕÒ²»µ½µÄÍøÒ³\r
-message32=HTTP ´íÎóÂë\r
-message33=Netscape °æ±¾\r
-message34=IE °æ±¾\r
-message35=×î½ü¸üÐÂ\r
-message36=Á´½ÓÍøÕ¾µÄ·½·¨\r
-message37=À´Ô´ÍøÖ·\r
-message38=ÍøÖ·ÓÉ²Î¹ÛÕß×ÔÐÐÊäÈë»ò´ÓÊéÇ©È¡³ö\r
-message39=ÎÞ·¨µÃÖªÁ¬½áµÄ·½·¨\r
-message40=À´×ÔËÑË÷ÒýÇæ\r
-message41=À´×Ô´ËÍøÕ¾ÍâµÄÆäËûÍøÒ³ (·ÇËÑË÷ÒýÇæ)\r
-message42=´ÓÍøÕ¾ÄÚ²¿Á¬½á\r
-message43=ÍøÕ¾ËÑË÷µÄ¹Ø¼ü×Ö¾ä\r
-message44=ÍøÕ¾ËÑË÷µÄ¹Ø¼ü×Ö´Ê\r
-message45=ÎÞ·¨·´½âÒëµÄIPµØÖ·\r
-message46=ÎÞ·¨µÃÖªµÄ²Ù×÷ÏµÍ³\r
-message47=ÕÒ²»µ½µÄÍøÖ·Á´½Ó (HTTP ´íÎóÂë 404)\r
-message48=IP µØÖ·\r
-message49=´íÎó´ÎÊý\r
-message50=ÎÞ·¨µÃÖªµÄä¯ÀÀÆ÷\r
-message51=¸ö»úÆ÷ÈË\r
-message52=²Î¹ÛÈË´Î/²Î¹ÛÕß\r
-message53=ËÑË÷ÒýÇæÍøÕ¾µÄ»úÆ÷ÈË\r
-message54=ÍøÒ³¼ÍÂ¼·ÖÎöÏµÍ³\r
-message55=¸öì¶\r
-message56=ÍøÒ³Êý\r
-message57=ÎÄ¼þÊý\r
-message58=°æ±¾\r
-message59=²Ù×÷ÏµÍ³\r
-message60=01ÔÂ\r
-message61=02ÔÂ\r
-message62=03ÔÂ\r
-message63=04ÔÂ\r
-message64=05ÔÂ\r
-message65=06ÔÂ\r
-message66=07ÔÂ\r
-message67=08ÔÂ\r
-message68=09ÔÂ\r
-message69=10ÔÂ\r
-message70=11ÔÂ\r
-message71=12ÔÂ\r
-message72=ä¯ÀÀÆ÷Í³¼Æ\r
-message73=ÎÄ¼þÀà±ð\r
-message74=Á¢¼´¸üÐÂ\r
-message75=×Ö½Ú\r
-message76=»Øµ½Ö÷Ò³\r
-message77=ǰ\r
-message78=yyyyÄêmmÔÂddÈÕ HH:MM\r
-message79=¹ýÂË°üº¬\r
-message80=È«²¿ÁÐ³ö\r
-message81=Ö÷»ú\r
-message82=¸ö½âÒë³É¹¦\r
-message83=ËÑË÷ÒýÇæÍøÕ¾\r
-message84=ÈÕ\r
-message85=һ\r
-message86=¶þ\r
-message87=Èý\r
-message88=ËÄ\r
-message89=Îå\r
-message90=Áù\r
-message91=°´ÐÇÆÚ\r
-message92=°´²Î¹ÛÕß\r
-message93=°´²Î¹ÛÊ±¼ä\r
-message94=¼ø±ð³öµÄÓÃ»§\r
-message95=×îÐ¡\r
-message96=Æ½¾ùÊý\r
-message97=×î´ó\r
-message98=ÍøÒ³Ñ¹Ëõ\r
-message99=½ÚÊ¡ÁËµÄ´ø¿í\r
-message100=Ñ¹ËõÇ°\r
-message101=Ñ¹Ëõºó\r
-message102=×ÜÊý\r
-message103=¸ö²»Í¬µÄ¹Ø¼ü×Ö¾ä\r
-message104=ÈëÕ¾´¦\r
-message105=±àÂë\r
-message106=Æ½¾ù´óÐ¡\r
-message107=´ÓÐÂÎÅÈº×éÁ´½Ó\r
-message108=KB\r
-message109=MB\r
-message110=GB\r
-message111=ÀëÏßä¯ÀÀÆ÷£¨ÍøÒ³×¥È¡£©\r
-message112=ÊÇ\r
-message113=·ñ\r
-message114=Whois ÐÅÏ¢\r
-message115=OK\r
-message116=³öÕ¾´¦\r
-message117=Ã¿´Î²Î¹ÛËù»¨Ê±¼ä\r
-message118=¹Ø±Õ´Ë´°¿Ú\r
-message119=Bytes\r
-message120=ÓÃÒÔËÑË÷µÄ¶ÌÓï\r
-message121=ÓÃÒÔËÑË÷µÄ¹Ø¼ü´Ê\r
-message122=¸ö²»Í¬µÄËÑË÷ÒýÇæ×ª½é²Î¹ÛÕßµ½ÕâÕ¾\r
-message123=¸ö²»Í¬µÄÆäËûÍøÕ¾×ª½é²Î¹ÛÕßµ½ÕâÕ¾\r
-message124=ÆäËû¶ÌÓï\r
-message125=ÆäËûµÇÂ¼ (°üÀ¨ÄäÃûµÇÂ¼)\r
-message126=ÓÉÄÇÐ©ËÑË÷ÒýÇæ×ª½é\r
-message127=ÓÉÄÇÐ©ÆäËûÍøÕ¾×ª½é\r
-message128=ժҪ\r
-message129=×÷È«ÄêÍ³¼ÆÊ±£¬ÎÞ·¨×¼È·µÃÖª²Î¹ÛÕßµÄÊýÄ¿\r
-message130=Êý¾ÝÖµÊý×é\r
-message131=·¢ÐÅÈËÓÊÖ·\r
-message132=ÊÕÐÅÈËÓÊ¼þµØÖ·\r
-message133=±¨±íÈÕÆÚ\r
-message134=ÌØ±ð/ÊÐ³¡\r
-message135=ÆÁÄ»·Ö±æÂÊ\r
-message136=Èä³æ/²¡¶¾ ¹¥»÷\r
-message137=¼ÓÈëµ½ÊÕ²Ø¼Ð(¹À¼Æ)\r
-message138=°´ÈÕÆÚÍ³¼Æ\r
-message139=ÆäËû\r
-message140=ä¯ÀÀÆ÷Ö§³Ö Java\r
-message141=ä¯ÀÀÆ÷Ö§³Ö Macromedia Director\r
-message142=ä¯ÀÀÆ÷Ö§³Ö Flash\r
-message143=ä¯ÀÀÆ÷Ö§³Ö Real audio ²¥·Å\r
-message144=ä¯ÀÀÆ÷Ö§³Ö Quicktime audio ²¥·Å\r
-message145=ä¯ÀÀÆ÷Ö§³Ö Windows Media audio ²¥·Å\r
-message146=ä¯ÀÀÆ÷Ö§³Ö PDF\r
-message147=SMTP´íÎó´úÂë\r
-message148=¹ú¼Ò»òµØÇø\r
-message149=ÓÊ¼þ\r
-message150=´óÐ¡\r
-message151=µÚÒ»¸ö\r
-message152=×îÄ©Ò»¸ö\r
-message153=¹ýÂË²»°üº¬\r
-message154=·Çä¯ÀÀÆ÷²úÉúµÄÁ÷Á¿£¨À´×ÔËÑË÷ÒýÇæ»úÆ÷ÈË£¬²¡¶¾Èä³æµÈ£©\r
-message155=¼¯Èº\r
-message156=ÒÔÉÏÁÐ³öµÄËÑË÷ÒýÇæ»úÆ÷ÈË²úÉúµÄ¡°·Çä¯ÀÀÆ÷¡±Á÷Á¿²¢Î´°üº¬ÔÚÆäËûÍ¼±íÖÐ\r
-message157=¡°+¡±ºóµÄÊý×ÖÎª³É¹¦µÄ¡°robots.txt¡±·ÃÎÊ´ÎÊý\r
-message158=ÒÔÉÏÁÐ³öµÄÈä³æ²úÉúµÄ¡°·Çä¯ÀÀÆ÷¡±Á÷Á¿²¢Î´°üº¬ÔÚÆäËûÍ¼±íÖÐ\r
-message159=·Çä¯ÀÀµÄÁ÷Á¿°üÀ¨ËÑË÷ÒýÇæ»úÆ÷ÈË£¬Èä³æ²¡¶¾²úÉúµÄÁ÷Á¿ºÍ·ÇÕý³£µÄHTTPÏàÓ¦\r
-message160=ä¯ÀÀÆ÷Á÷Á¿\r
-message161=·Çä¯ÀÀÆ÷Á÷Á¿\r
-message162=°´ÔÂÀúÊ·Í³¼Æ\r
-message163=Èä³æ\r
-message164=²»Í¬µÄÈä³æ\r
-message165=³É¹¦·¢ËÍÓÊ¼þ\r
-message166=ÓÊ¼þÊ§°Ü»ò¾ÜÊÕ\r
-message167=Ãô¸ÐÄ¿±ê\r
-message168=Javascript½ûÓÃ\r
-message169=´´½¨Õß\r
-message170=²å¼þ\r
-message171=µØÇø\r
-message172=³ÇÊÐ\r
-message173=Opera °æ±¾\r
-message174=Safari °æ±¾\r
-message175=Chrome °æ±¾\r
-message176=Konqueror °æ±¾\r
-message177=,\r
-message178=ÏÂÔØ\r
+# Chinese (simplified) message file (by Che Dong chedongATgmail.com)
+# $Revision$ - $Date$
+PageCode=GBK
+message0=ÎÞ·¨µÃÖª
+message1=ÎÞ·¨µÃÖª£¨²»ÄÜ·´Ïò½âÎöµÄÍøÓòÃû³Æ£©
+message2=ÆäËû
+message3=²é¿´ÏêÏ¸×ÊÁÏ
+message4=ÈÕÆÚ
+message5=ÔÂ
+message6=Äê
+message7=Í³¼ÆÍøÕ¾
+message8=Ê×´Î²Î¹ÛÈÕÆÚ
+message9=×î½ü²Î¹ÛÈÕÆÚ
+message10=²Î¹ÛÈË´Î
+message11=²Î¹ÛÕß
+message12=²Î¹Û
+message13=¸ö¹Ø¼ü×Ö´Ê
+message14=ËÑË÷
+message15=°Ù·Ö±È
+message16=Á÷Á¿Í³¼Æ
+message17=ÍøÓò»ò¹ú¼Ò
+message18=²Î¹ÛÕß
+message19=URL ÍøÖ·
+message20=Ã¿Ð¡Ê±ä¯ÀÀ´ÎÊý
+message21=ä¯ÀÀÆ÷
+message22=HTTP ´íÎó
+message23=·´ÏàÁ´½Ó
+message24=´ÓÎ´¸üÐÂ£¨Çë²Î¿¼ awstats_setup.htmlÉÏµÄ 'Build/Update'£©
+message25=²Î¹ÛÕßµÄÍøÓò»ò¹ú¼Ò
+message26=Ö÷»úÊý
+message27=ÍøÒ³Êý
+message28=¸ö²»Í¬µÄÍøÒ³
+message29=´æÈ¡´ÎÊý
+message30=²»Í¬µÄ×Ö´Ê
+message31=ÕÒ²»µ½µÄÍøÒ³
+message32=HTTP ´íÎóÂë
+message33=Netscape °æ±¾
+message34=IE °æ±¾
+message35=×î½ü¸üÐÂ
+message36=Á´½ÓÍøÕ¾µÄ·½·¨
+message37=À´Ô´ÍøÖ·
+message38=ÍøÖ·ÓÉ²Î¹ÛÕß×ÔÐÐÊäÈë»ò´ÓÊéÇ©È¡³ö
+message39=ÎÞ·¨µÃÖªÁ¬½áµÄ·½·¨
+message40=À´×ÔËÑË÷ÒýÇæ
+message41=À´×Ô´ËÍøÕ¾ÍâµÄÆäËûÍøÒ³ (·ÇËÑË÷ÒýÇæ)
+message42=´ÓÍøÕ¾ÄÚ²¿Á¬½á
+message43=ÍøÕ¾ËÑË÷µÄ¹Ø¼ü×Ö¾ä
+message44=ÍøÕ¾ËÑË÷µÄ¹Ø¼ü×Ö´Ê
+message45=ÎÞ·¨·´½âÒëµÄIPµØÖ·
+message46=ÎÞ·¨µÃÖªµÄ²Ù×÷ÏµÍ³
+message47=ÕÒ²»µ½µÄÍøÖ·Á´½Ó (HTTP ´íÎóÂë 404)
+message48=IP µØÖ·
+message49=´íÎó´ÎÊý
+message50=ÎÞ·¨µÃÖªµÄä¯ÀÀÆ÷
+message51=¸ö»úÆ÷ÈË
+message52=²Î¹ÛÈË´Î/²Î¹ÛÕß
+message53=ËÑË÷ÒýÇæÍøÕ¾µÄ»úÆ÷ÈË
+message54=ÍøÒ³¼ÍÂ¼·ÖÎöÏµÍ³
+message55=¸öì¶
+message56=ÍøÒ³Êý
+message57=ÎÄ¼þÊý
+message58=°æ±¾
+message59=²Ù×÷ÏµÍ³
+message60=01ÔÂ
+message61=02ÔÂ
+message62=03ÔÂ
+message63=04ÔÂ
+message64=05ÔÂ
+message65=06ÔÂ
+message66=07ÔÂ
+message67=08ÔÂ
+message68=09ÔÂ
+message69=10ÔÂ
+message70=11ÔÂ
+message71=12ÔÂ
+message72=ä¯ÀÀÆ÷Í³¼Æ
+message73=ÎÄ¼þÀà±ð
+message74=Á¢¼´¸üÐÂ
+message75=×Ö½Ú
+message76=»Øµ½Ö÷Ò³
+message77=ǰ
+message78=yyyyÄêmmÔÂddÈÕ HH:MM
+message79=¹ýÂË°üº¬
+message80=È«²¿ÁÐ³ö
+message81=Ö÷»ú
+message82=¸ö½âÒë³É¹¦
+message83=ËÑË÷ÒýÇæÍøÕ¾
+message84=ÈÕ
+message85=һ
+message86=¶þ
+message87=Èý
+message88=ËÄ
+message89=Îå
+message90=Áù
+message91=°´ÐÇÆÚ
+message92=°´²Î¹ÛÕß
+message93=°´²Î¹ÛÊ±¼ä
+message94=¼ø±ð³öµÄÓÃ»§
+message95=×îÐ¡
+message96=Æ½¾ùÊý
+message97=×î´ó
+message98=ÍøÒ³Ñ¹Ëõ
+message99=½ÚÊ¡ÁËµÄ´ø¿í
+message100=Ñ¹ËõÇ°
+message101=Ñ¹Ëõºó
+message102=×ÜÊý
+message103=¸ö²»Í¬µÄ¹Ø¼ü×Ö¾ä
+message104=ÈëÕ¾´¦
+message105=±àÂë
+message106=Æ½¾ù´óÐ¡
+message107=´ÓÐÂÎÅÈº×éÁ´½Ó
+message108=KB
+message109=MB
+message110=GB
+message111=ÀëÏßä¯ÀÀÆ÷£¨ÍøÒ³×¥È¡£©
+message112=ÊÇ
+message113=·ñ
+message114=Whois ÐÅÏ¢
+message115=OK
+message116=³öÕ¾´¦
+message117=Ã¿´Î²Î¹ÛËù»¨Ê±¼ä
+message118=¹Ø±Õ´Ë´°¿Ú
+message119=Bytes
+message120=ÓÃÒÔËÑË÷µÄ¶ÌÓï
+message121=ÓÃÒÔËÑË÷µÄ¹Ø¼ü´Ê
+message122=¸ö²»Í¬µÄËÑË÷ÒýÇæ×ª½é²Î¹ÛÕßµ½ÕâÕ¾
+message123=¸ö²»Í¬µÄÆäËûÍøÕ¾×ª½é²Î¹ÛÕßµ½ÕâÕ¾
+message124=ÆäËû¶ÌÓï
+message125=ÆäËûµÇÂ¼ (°üÀ¨ÄäÃûµÇÂ¼)
+message126=ÓÉÄÇÐ©ËÑË÷ÒýÇæ×ª½é
+message127=ÓÉÄÇÐ©ÆäËûÍøÕ¾×ª½é
+message128=ժҪ
+message129=×÷È«ÄêÍ³¼ÆÊ±£¬ÎÞ·¨×¼È·µÃÖª²Î¹ÛÕßµÄÊýÄ¿
+message130=Êý¾ÝÖµÊý×é
+message131=·¢ÐÅÈËÓÊÖ·
+message132=ÊÕÐÅÈËÓÊ¼þµØÖ·
+message133=±¨±íÈÕÆÚ
+message134=ÌØ±ð/ÊÐ³¡
+message135=ÆÁÄ»·Ö±æÂÊ
+message136=Èä³æ/²¡¶¾ ¹¥»÷
+message137=¼ÓÈëµ½ÊÕ²Ø¼Ð(¹À¼Æ)
+message138=°´ÈÕÆÚÍ³¼Æ
+message139=ÆäËû
+message140=ä¯ÀÀÆ÷Ö§³Ö Java
+message141=ä¯ÀÀÆ÷Ö§³Ö Macromedia Director
+message142=ä¯ÀÀÆ÷Ö§³Ö Flash
+message143=ä¯ÀÀÆ÷Ö§³Ö Real audio ²¥·Å
+message144=ä¯ÀÀÆ÷Ö§³Ö Quicktime audio ²¥·Å
+message145=ä¯ÀÀÆ÷Ö§³Ö Windows Media audio ²¥·Å
+message146=ä¯ÀÀÆ÷Ö§³Ö PDF
+message147=SMTP´íÎó´úÂë
+message148=¹ú¼Ò»òµØÇø
+message149=ÓÊ¼þ
+message150=´óÐ¡
+message151=µÚÒ»¸ö
+message152=×îÄ©Ò»¸ö
+message153=¹ýÂË²»°üº¬
+message154=·Çä¯ÀÀÆ÷²úÉúµÄÁ÷Á¿£¨À´×ÔËÑË÷ÒýÇæ»úÆ÷ÈË£¬²¡¶¾Èä³æµÈ£©
+message155=¼¯Èº
+message156=ÒÔÉÏÁÐ³öµÄËÑË÷ÒýÇæ»úÆ÷ÈË²úÉúµÄ¡°·Çä¯ÀÀÆ÷¡±Á÷Á¿²¢Î´°üº¬ÔÚÆäËûÍ¼±íÖÐ
+message157=¡°+¡±ºóµÄÊý×ÖÎª³É¹¦µÄ¡°robots.txt¡±·ÃÎÊ´ÎÊý
+message158=ÒÔÉÏÁÐ³öµÄÈä³æ²úÉúµÄ¡°·Çä¯ÀÀÆ÷¡±Á÷Á¿²¢Î´°üº¬ÔÚÆäËûÍ¼±íÖÐ
+message159=·Çä¯ÀÀµÄÁ÷Á¿°üÀ¨ËÑË÷ÒýÇæ»úÆ÷ÈË£¬Èä³æ²¡¶¾²úÉúµÄÁ÷Á¿ºÍ·ÇÕý³£µÄHTTPÏàÓ¦
+message160=ä¯ÀÀÆ÷Á÷Á¿
+message161=·Çä¯ÀÀÆ÷Á÷Á¿
+message162=°´ÔÂÀúÊ·Í³¼Æ
+message163=Èä³æ
+message164=²»Í¬µÄÈä³æ
+message165=³É¹¦·¢ËÍÓÊ¼þ
+message166=ÓÊ¼þÊ§°Ü»ò¾ÜÊÕ
+message167=Ãô¸ÐÄ¿±ê
+message168=Javascript½ûÓÃ
+message169=´´½¨Õß
+message170=²å¼þ
+message171=µØÇø
+message172=³ÇÊÐ
+message173=Opera °æ±¾
+message174=Safari °æ±¾
+message175=Chrome °æ±¾
+message176=Konqueror °æ±¾
+message177=,
+message178=ÏÂÔØ
diff --git a/wwwroot/cgi-bin/lang/awstats-lv.txt b/wwwroot/cgi-bin/lang/awstats-lv.txt

index 9f3985ae545ad89af7ff4e9bf566581700ab6b5f..fce13900b8b6f40d96a21f4cf15d9f13d185409f 100644 (file)
--- a/wwwroot/cgi-bin/lang/awstats-lv.txt
+++ b/wwwroot/cgi-bin/lang/awstats-lv.txt
@@ -1,178 +1,178 @@
-# Latviešu valodas ziņojumu fails (madmaster@gobbo.caves.lv)\r
-# Updated by edvinsma@inbox.lv 2004/01/24 00:40:00\r
-# $Revision$ - $Date$\r
-PageCode=utf-8\r
-message0=Nezināms\r
-message1=Nezināms (neatpazīts ip)\r
-message2=Citi\r
-message3=Apskatīt izvērsti\r
-message4=Diena\r
-message5=Mēnesis\r
-message6=Gads\r
-message7=Statistika\r
-message8=Pirmais apmeklējums\r
-message9=Pēdējais apmeklējums\r
-message10=Vizīšu skaits\r
-message11=Unikālie apmeklētāji\r
-message12=Apmeklējums\r
-message13=atšķirīgi(s) atslēgvārdi(s)\r
-message14=Meklēt\r
-message15=Procenti\r
-message16=Trafiks\r
-message17=Domaini/Valstis\r
-message18=Apmeklētāji\r
-message19=Lapas-URL\r
-message20=Stundas\r
-message21=Pārlūkprogrammas\r
-message22=HTTP Kļūdas\r
-message23=Norādītāji\r
-message24=Meklēt&nbsp;Atslēgvārdus\r
-message25=Apmeklētāju domaini/valstis\r
-message26=hosti\r
-message27=lapas\r
-message28=atšķirīgas lapas\r
-message29=Skatītas lapas\r
-message30=Citi vārdi\r
-message31=Neatrastas lapas\r
-message32=HTTP Kļūdu kodi\r
-message33=Netscape versijas\r
-message34=IE versijas\r
-message35=Pēdējais jauninājums\r
-message36=Pievienoties saitei no\r
-message37=Oriģināli\r
-message38=Tiešā adrese / Grāmatzīmes\r
-message39=Orģināls nezināms\r
-message40=Norādes no Interneta Meklēšanas Saitēm\r
-message41=Norādes no ārējām lapām (citas web lapas izņemot meklēšanas saites)\r
-message42=Links from an internal page (cita lapa šajā pašā saitē)\r
-message43=Atslēgvārdi kas lietoti meklēšanas saitēs\r
-message44=Kb\r
-message45=Neatpazītas IP Addreses\r
-message46=Nezināms OS (Norādes Lauks)\r
-message47=Pieprasīts bet neatrasts URLs (HTTP kods 404)\r
-message48=IP Addrese\r
-message49=Kļuda&nbsp;Trāpījumi\r
-message50=Nezināmi pārlūki (Norādes lauks)\r
-message51=Apmeklējušie roboti\r
-message52=apmeklējumi/apmeklētāji\r
-message53=Roboti/Zirnekļi apmeklētāji\r
-message54=Brīvs reālā laika logfailu analizators advancētai web statistikai\r
-message55=no\r
-message56=Lapas\r
-message57=Trāpījumi\r
-message58=Versijas\r
-message59=Operētājsistēmas\r
-message60=Jan\r
-message61=Feb\r
-message62=Mar\r
-message63=Apr\r
-message64=Mai\r
-message65=Jūn\r
-message66=Jūl\r
-message67=Aug\r
-message68=Sep\r
-message69=Okt\r
-message70=Nov\r
-message71=Dec\r
-message72=Navigācija\r
-message73=Failu tips\r
-message74=Atjaunot\r
-message75=Baiti\r
-message76=Atpakaļ uz galveno lapu\r
-message77=Augša\r
-message78=dd mmm yyyy - HH:MM\r
-message79=Filtrs\r
-message80=Pilns saraksts\r
-message81=Hosti\r
-message82=Zināms\r
-message83=Roboti\r
-message84=Sv\r
-message85=Pir\r
-message86=Ot\r
-message87=Tr\r
-message88=Ce\r
-message89=Pkt\r
-message90=Se\r
-message91=Nedēļas dienas\r
-message92=Kas\r
-message93=Kad\r
-message94=Autentificētie lietotāji\r
-message95=Min\r
-message96=Vid\r
-message97=Maks\r
-message98=Web salīdzinājums\r
-message99=saglabātais joslas platums\r
-message100=Pirms kompresijas\r
-message101=Pēc kompresijas\r
-message102=Kopā\r
-message103=Atšķirīgi atslēgvārdi\r
-message104=Iejas lapas\r
-message105=Kods\r
-message106=Vidējais izmērs\r
-message107=Saites no Ziņu grupām\r
-message108=KB\r
-message109=MB\r
-message110=GB\r
-message111=Savācējs\r
-message112=Jā\r
-message113=Nē\r
-message114=WhoIs informācija\r
-message115=OK\r
-message116=Izejas pages\r
-message117=Apmeklējuma ilgums\r
-message118=Aizvērt logu\r
-message119=Baiti\r
-message120=Meklēšanas&nbsp;atslēgfrāzes\r
-message121=Meklēšanas&nbsp;atslēgvārdi\r
-message122=Citas meklētāju lapas ar atsaucēm\r
-message123=Citas lapas ar atsaucēm\r
-message124=Citas frāzes\r
-message125=Anonīmie lietotāji\r
-message126=Meklētāju lapas ar atsaucēm\r
-message127=Lapas ar atsaucēm\r
-message128=Kopsavilkums\r
-message129=Precīza vērtība sadaļā &quot;Gads&quot; nav pieejama\r
-message130=Datu vērību kopnes\r
-message131=Sūtītāja adrese\r
-message132=Saņēmēja adrese\r
-message133=Atskaites periods\r
-message134=Papildus/Mārketings\r
-message135=Ekrāna izšķiršanas spēja\r
-message136=Vīrusu uzbrukumi\r
-message137=Pievienots izlasei\r
-message138=Mēneša dienas\r
-message139=Dažādi\r
-message140=Pārlūkprogrammas ar Java atbalstu\r
-message141=Pārlūkprogrammas ar Macromedia Director atbalstu\r
-message142=Pārlūkprogrammas ar Flash atbalstu\r
-message143=Pārlūkprogrammas ar RealAudio atbalstu\r
-message144=Pārlūkprogrammas ar QuickTime atbalstu\r
-message145=Pārlūkprogrammas ar Windows Media atbalstu\r
-message146=Pārlūkprogrammas ar PDF atbalstu\r
-message147=SMTP kļūdu kodi\r
-message148=Valstis\r
-message149=E-pasti\r
-message150=Izmērs\r
-message151=Sākums\r
-message152=Beigas\r
-message153=Izslēgšanas filtrs\r
-message154=Šeit kodi parāda šāvienus vai trafiku, ko nav apskatījuši lietotāji, tāpēc viņi nav iekļauti citās diagrammās.\r
-message155=Puduris\r
-message156=Šeit uzrādītie roboti ir radījuši trāpijumus vai "nepskatīto" trafiku, tāpēc tie nav iekļauti citās diagrammās. \r
-message157=Skaitlis pēc "+" ir veiksmīgo šāvienu skaits robots.txt failam.\r
-message158=Šie ir uzrādīti trāpijumi vai trafiks ko radīja tīkla tārpi vai arī "neapskatītās" lapas, tāpēc tie nav iekļauti citās \r
-diagrammās.\r
-message159="Neapskatīto" trafiku ģenerē roboti, tīkla tārpi, vai arī atbildes ar specialo HTTP statusa kodu.\r
-message160=Apskatīts trafiks\r
-message161=Nav apskatīts trafiks\r
-message162=Mēneša atskaite\r
-message163=Tīkla tārpi\r
-message164=Dažādi tīkla tārpi\r
-message165=Veiksmīgi nosūtīti e-pasti\r
-message166=Neveiksmīgas e-pasta sūtīšanas\r
-message167=Ievainojamība\r
-message168=Atslēgtsw Javascript\r
-message169=Izveidojis\r
-message170=spraudņi\r
-message171=Reģioni\r
+# Latviešu valodas ziņojumu fails (madmaster@gobbo.caves.lv)
+# Updated by edvinsma@inbox.lv 2004/01/24 00:40:00
+# $Revision$ - $Date$
+PageCode=utf-8
+message0=Nezināms
+message1=Nezināms (neatpazīts ip)
+message2=Citi
+message3=Apskatīt izvērsti
+message4=Diena
+message5=Mēnesis
+message6=Gads
+message7=Statistika
+message8=Pirmais apmeklējums
+message9=Pēdējais apmeklējums
+message10=Vizīšu skaits
+message11=Unikālie apmeklētāji
+message12=Apmeklējums
+message13=atšķirīgi(s) atslēgvārdi(s)
+message14=Meklēt
+message15=Procenti
+message16=Trafiks
+message17=Domaini/Valstis
+message18=Apmeklētāji
+message19=Lapas-URL
+message20=Stundas
+message21=Pārlūkprogrammas
+message22=HTTP Kļūdas
+message23=Norādītāji
+message24=Meklēt&nbsp;Atslēgvārdus
+message25=Apmeklētāju domaini/valstis
+message26=hosti
+message27=lapas
+message28=atšķirīgas lapas
+message29=Skatītas lapas
+message30=Citi vārdi
+message31=Neatrastas lapas
+message32=HTTP Kļūdu kodi
+message33=Netscape versijas
+message34=IE versijas
+message35=Pēdējais jauninājums
+message36=Pievienoties saitei no
+message37=Oriģināli
+message38=Tiešā adrese / Grāmatzīmes
+message39=Orģināls nezināms
+message40=Norādes no Interneta Meklēšanas Saitēm
+message41=Norādes no ārējām lapām (citas web lapas izņemot meklēšanas saites)
+message42=Links from an internal page (cita lapa šajā pašā saitē)
+message43=Atslēgvārdi kas lietoti meklēšanas saitēs
+message44=Kb
+message45=Neatpazītas IP Addreses
+message46=Nezināms OS (Norādes Lauks)
+message47=Pieprasīts bet neatrasts URLs (HTTP kods 404)
+message48=IP Addrese
+message49=Kļuda&nbsp;Trāpījumi
+message50=Nezināmi pārlūki (Norādes lauks)
+message51=Apmeklējušie roboti
+message52=apmeklējumi/apmeklētāji
+message53=Roboti/Zirnekļi apmeklētāji
+message54=Brīvs reālā laika logfailu analizators advancētai web statistikai
+message55=no
+message56=Lapas
+message57=Trāpījumi
+message58=Versijas
+message59=Operētājsistēmas
+message60=Jan
+message61=Feb
+message62=Mar
+message63=Apr
+message64=Mai
+message65=Jūn
+message66=Jūl
+message67=Aug
+message68=Sep
+message69=Okt
+message70=Nov
+message71=Dec
+message72=Navigācija
+message73=Failu tips
+message74=Atjaunot
+message75=Baiti
+message76=Atpakaļ uz galveno lapu
+message77=Augša
+message78=dd mmm yyyy - HH:MM
+message79=Filtrs
+message80=Pilns saraksts
+message81=Hosti
+message82=Zināms
+message83=Roboti
+message84=Sv
+message85=Pir
+message86=Ot
+message87=Tr
+message88=Ce
+message89=Pkt
+message90=Se
+message91=Nedēļas dienas
+message92=Kas
+message93=Kad
+message94=Autentificētie lietotāji
+message95=Min
+message96=Vid
+message97=Maks
+message98=Web salīdzinājums
+message99=saglabātais joslas platums
+message100=Pirms kompresijas
+message101=Pēc kompresijas
+message102=Kopā
+message103=Atšķirīgi atslēgvārdi
+message104=Iejas lapas
+message105=Kods
+message106=Vidējais izmērs
+message107=Saites no Ziņu grupām
+message108=KB
+message109=MB
+message110=GB
+message111=Savācējs
+message112=Jā
+message113=Nē
+message114=WhoIs informācija
+message115=OK
+message116=Izejas pages
+message117=Apmeklējuma ilgums
+message118=Aizvērt logu
+message119=Baiti
+message120=Meklēšanas&nbsp;atslēgfrāzes
+message121=Meklēšanas&nbsp;atslēgvārdi
+message122=Citas meklētāju lapas ar atsaucēm
+message123=Citas lapas ar atsaucēm
+message124=Citas frāzes
+message125=Anonīmie lietotāji
+message126=Meklētāju lapas ar atsaucēm
+message127=Lapas ar atsaucēm
+message128=Kopsavilkums
+message129=Precīza vērtība sadaļā &quot;Gads&quot; nav pieejama
+message130=Datu vērību kopnes
+message131=Sūtītāja adrese
+message132=Saņēmēja adrese
+message133=Atskaites periods
+message134=Papildus/Mārketings
+message135=Ekrāna izšķiršanas spēja
+message136=Vīrusu uzbrukumi
+message137=Pievienots izlasei
+message138=Mēneša dienas
+message139=Dažādi
+message140=Pārlūkprogrammas ar Java atbalstu
+message141=Pārlūkprogrammas ar Macromedia Director atbalstu
+message142=Pārlūkprogrammas ar Flash atbalstu
+message143=Pārlūkprogrammas ar RealAudio atbalstu
+message144=Pārlūkprogrammas ar QuickTime atbalstu
+message145=Pārlūkprogrammas ar Windows Media atbalstu
+message146=Pārlūkprogrammas ar PDF atbalstu
+message147=SMTP kļūdu kodi
+message148=Valstis
+message149=E-pasti
+message150=Izmērs
+message151=Sākums
+message152=Beigas
+message153=Izslēgšanas filtrs
+message154=Šeit kodi parāda šāvienus vai trafiku, ko nav apskatījuši lietotāji, tāpēc viņi nav iekļauti citās diagrammās.
+message155=Puduris
+message156=Šeit uzrādītie roboti ir radījuši trāpijumus vai "nepskatīto" trafiku, tāpēc tie nav iekļauti citās diagrammās. 
+message157=Skaitlis pēc "+" ir veiksmīgo šāvienu skaits robots.txt failam.
+message158=Šie ir uzrādīti trāpijumi vai trafiks ko radīja tīkla tārpi vai arī "neapskatītās" lapas, tāpēc tie nav iekļauti citās 
+diagrammās.
+message159="Neapskatīto" trafiku ģenerē roboti, tīkla tārpi, vai arī atbildes ar specialo HTTP statusa kodu.
+message160=Apskatīts trafiks
+message161=Nav apskatīts trafiks
+message162=Mēneša atskaite
+message163=Tīkla tārpi
+message164=Dažādi tīkla tārpi
+message165=Veiksmīgi nosūtīti e-pasti
+message166=Neveiksmīgas e-pasta sūtīšanas
+message167=Ievainojamība
+message168=Atslēgtsw Javascript
+message169=Izveidojis
+message170=spraudņi
+message171=Reģioni
  message172=Pilsētas
 \ No newline at end of file
diff --git a/wwwroot/cgi-bin/lib/robots.pm b/wwwroot/cgi-bin/lib/robots.pm

index c443f66bd2f0afb5c55d2988c5bf60806703f74e..f6124146f2eb27af5e404cbe7132a65e8ba9987b 100644 (file)
--- a/wwwroot/cgi-bin/lib/robots.pm
+++ b/wwwroot/cgi-bin/lib/robots.pm
@@ -1,2219 +1,2219 @@
-# AWSTATS ROBOTS DATABASE\r
-#-------------------------------------------------------\r
-# If you want to add robots to extend AWStats database detection capabilities,\r
-# you must add an entry in RobotsSearchIDOrder_listx and RobotsHashIDLib.\r
-\r
-# The entry in RobotsSearchIDOrder_listx is a Perl regular expression\r
-# (see http://perldoc.perl.org/perlreref.html). AWSTats applies these\r
-# expressions to the user agent string in the order given by the lists. The\r
-# first match specifies the robot.\r
-#\r
-# Note: This regular expression must not contain any whitespace.\r
-# Otherwise AWStats will produce lines in the database that\r
-# will be misinterpreted and as a consequence the corresponding data in the\r
-# generated HTML reports will be wrong. If you want to match whitespace in\r
-# the user agent string, use other constructs like '\s', '[:blank:]',\r
-# '\p{IsSpace}', '\x20' etc.\r
-#\r
-# The corresponding entry in RobotsHashIDLib contains the regular expression\r
-# as key, followed by a string containing HTML-text. AWStats inserts this\r
-# text into reports to describe the bot. If possible the text should contain\r
-# a link to the bot home page. This makes it easier for sysadmins to find\r
-# the information necessary e.g. to adapt the robots.txt file.\r
-#\r
-# An entry in the RobotsAffiliateLib is not necessary. An entry in this list\r
-# contains as first part the regular expression specifying the bot. The\r
-# second part is a string that gives the Company or product managing the bot.\r
-# This information is not used yet.\r
-#\r
-# There are several sorts of bots that AWStats is not able to detect and\r
-# therefore a considerable amount of bot generated traffic counts\r
-# as user traffic:\r
-#\r
-# a) A crawler that identifies itself in the referrer string, but not in\r
-#    the user agent string. An example is the crawler from semalt.semalt.com.\r
-#\r
-# b) Crawlers that correctly access robots.txt but identify themselves in\r
-#    in the user agent string only once or just a few times. Most of the\r
-#    time a user agent string ist used that does not contain hints that\r
-#    a bot is involved. An example is the iCjobs spider.\r
-#    msnbot-UDiscovery/2.0b seems to show this behaviour too.\r
-#\r
-#\r
-#\r
-#-------------------------------------------------------\r
-\r
-# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html\r
-#              added dipsie (not tested with real data).\r
-#              added DomainsDB.net http://domainsdb.net/\r
-#              added ia_archiver-web.archive.org (was inadvertently grouped with Alexa traffic)\r
-#              added Nutch (used by looksmart (furl?))\r
-#              added rssImagesBot\r
-#              added Sqworm\r
-#              added t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e\r
-#              added w3c css-validator\r
-#              added documentation link to bot home pages for above and selected major bots.\r
-#                    In the case of international bots, choose .com page.\r
-#                    Included tool tip (html "title").\r
-#                    To do: parameterize to match both AWStats language and tooltips settings.\r
-#                    To do: add html links for all bots based on current documentation in source\r
-#                           files referenced below.\r
-#              changed '\wbot[\/\-]', to '\wbot[\/\-]' (removed comma)\r
-#              made minor grammar corrections to notes below\r
-# 2005-08-24   added YahooSeeker-Testing\r
-#                      added w3c-checklink\r
-#                      updated url for ask.com\r
-# 2005-08-24           added Girafabot http://www.girafa.com/\r
-# 2005-08-30           added PluckFeedCrawler http://www.pluck.com/\r
-#              added Gaisbot/3.0 (robot05@gais.cs.ccu.edu.tw; )\r
-#              dded geniebot (wgao@genieknows.com)\r
-#              added BecomeBot link http://www.become.com/site_owners.html\r
-#              added topicblogs http://www.topicblogs.com/\r
-#              added Powermarks; seen used by referrer spam\r
-#              added YahooSeeker\r
-#              added NG/2. http://www.exabot.com/\r
-# 2005-09-15   added link for Walhello appie\r
-#              added bender focused_crawler\r
-#              updated YahooSeeker description (blog crawler)\r
-# 2005-09-16   added link for http://linkchecker.sourceforge.net\r
-#              added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl)\r
-#              added Blogslive  info@blogslive.com intelliseek.com\r
-#              added BlogPulse (ISSpider-3.0) intelliseek.com\r
-# 2005-09-26   added Feedfetcher-Google (http://www.google.com/feedfetcher.html)\r
-#              added EverbeeCrawler\r
-#              added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html\r
-#              added link for Bloglines http://www.bloglines.com\r
-# 2005-10-19   fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html)\r
-#              added Blogshares Spiders (Synchronized V1.5.1)\r
-#              added yacy\r
-# 2005-11-21   added Argus www.simpy.com\r
-#              added BlogsSay :: RSS Search Crawler (http://www.blogssay.com/)\r
-#              added MJ12bot http://majestic12.co.uk/bot.php\r
-#              added OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)\r
-#              added OutfoxBot/0.3 (For internet experiments; outfox.agent@gmail.com)\r
-#              added RufusBot Rufus Web Miner http://64.124.122.252.webaroo.com/feedback.html\r
-#              added Seekbot (http://www.seekbot.net/bot.html)\r
-#              added Yahoo-MMCrawler/3.x (mms-mmcrawler-support@yahoo-inc.com)\r
-#               added link for BaiDuSpider\r
-#              added link for Blogshares Spider\r
-#              added link for StackRambler http://www.rambler.ru/doc/faq.shtml\r
-#              added link for WISENutbot\r
-#              added link for ZyBorg/1.0 (wn-14.zyborg@looksmart.net; http://www.WISEnutbot.com.  Moved location to above wisenut to avoid classification as wisenut\r
-# 2005-12-15\r
-#              added FAST Enteprise Crawler/6 (www dot fastsearch dot com). Note spelling Enteprise not Enterprise.\r
-#              added findlinks http://wortschatz.uni-leipzig.de/findlinks/\r
-#              added IBM Almaden Research Center WebFountain™ http://www.almaden.ibm.com/cs/crawler [hc3]\r
-#              added INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)\r
-#              added lmspider (lmspider@scansoft.com) http://www.nuance.com/\r
-#              added noxtrumbot http://www.noxtrum.com/\r
-#              added SandCrawler (Microsoft)\r
-#              added SBIder http://www.sitesell.com/sbider.html\r
-#              added SeznamBot http://fulltext.seznam.cz/\r
-#              added sohu-search http://corp.sohu.com/ (looked for //robots.txt not /robots.txt)\r
-#              added the ruffle SemanticWeb crawler v0.5 - http://www.unreach.net\r
-#              added WebVulnCrawl/1.0 libwww-perl/5.803 (looked for //robots.txt not /robots.txt)\r
-#              added Yahoo! Japan keyoshid http://www.yahoo.co.jp/\r
-#              added Y!J http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html\r
-#              added link for GigaBot\r
-#              added link for MagpieRSS\r
-#              added link for MSIECrawler\r
-# 2005-12-21\r
-#              added aipbot http://www.aipbot.com aipbot@aipbot.com [matthys70 users.sourceforge.net]\r
-#              added Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)\r
-#              added Fast-Search-Engine http://www.fast-search-engine.com/ [matthys70  users.sourceforge.net]\r
-#              added g2Crawler (nobody@airmail.net) http://crawler.instantnetworks.net/\r
-#              added Jakarta commons-httpclient http://jakarta.apache.org/commons/httpclient/ (hit robots.txt).  May be used as robot or browser - a site may want to remove this entry.\r
-#              added OmniExplorer_Bot http://www.omni-explorer.com/ [matthys70 users.sourceforge.net]\r
-#              added USTC-Semantic-Group ai.ustc.edu.cn/mas/en/research/index.php ?\r
-# 2005-12-22\r
-#              added EARTHCOM.info www.earthcom.info\r
-#              added HTTrack off-line browser 'httrack','HTTrack', http://www.httrack.com/ [Moizes Gabor]\r
-#              added KummHttp http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b [Moizes Gabor]\r
-# 2006-01-01\r
-#              added Dulance http://www.dulance.com/bot.jsp\r
-#              added MojeekBot http://www.mojeek.com/bot.html\r
-#              added nicebot http://www.egghelp.org/setup.htm ?\r
-#              added Snappy http://www.urltrends.com/faq.php\r
-#              added sohu agent\r
-#              added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net]\r
-#              added zspider http://feedback.redkolibri.com/\r
-# 2006-01-13\r
-#              added boitho.com-dc http://www.boitho.com/dcbot.html\r
-#              added IRLbot http://irl.cs.tamu.edu/crawler\r
-#              added virus_detector virus_harvester@securecomputing.com\r
-#              added Wavefire http://www.wavefire.com; info@wavefire.com\r
-#              added WebFilter Robot\r
-# 2006-01-24\r
-#              added Shim-Crawler http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp\r
-#              added Exabot exabot.com\r
-#              added LetsCrawl.com http://letscrawl.com\r
-#              added ichiro http://help.goo.ne.jp/door/crawlerE.html\r
-# 2006-01-27    additional 22 robots from a list provided by Moizes Gabor\r
-#              added ALeadSoftbot      http://www.aleadsoft.com/bot.htm\r
-#              added CipinetBot        http://www.cipinet.com/bot.html\r
-#              added Cuasarbot http://www.cuasar.com/\r
-#              added Dumbot    http://www.dumbfind.com/\r
-#              added Extreme_Picture_Finder    http://www.exisoftware.com/\r
-#              added Fooky.com/ScorpionBot/ScoutOut    http://www.fooky.com/scorpionbots\r
-#              added IlTrovatore-Setaccio      http://www.iltrovatore.it/aiuto/motore_di_ricerca.html  bot@iltrovatore.it\r
-#              added InsurancoBot      http://www.fastspywareremoval.com/\r
-#              added InternetArchive   http://lucene.apache.org/nutch/bot.html         nutch-agent@lucene.apache.org\r
-#              added KazoomBot http://www.kazoom.ca/bot.html   kazoombot@kazoom.ca\r
-#              added Kurzor    http://www.easymail.hu/ cursor@easymail.hu\r
-#              added NutchCVS  http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org\r
-#              added NutchOSU-VLIB     http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org\r
-#              added Orbiter   http://www.dailyorbit.com/bot.htm\r
-#              added PHP_version_tracker       http://www.nexen.net/phpversion/bot.php\r
-#              added SuperBot  http://www.sparkleware.com/superbot/\r
-#              added SynooBot  http://www.synoo.de/bot.html    webmaster@synoo.com\r
-#              added TestBot   http://www.agbrain.com/\r
-#              added TutorGigBot       http://www.tutorgig.info/\r
-#              added WebIndexer        mailto://webindexerv1@yahoo.com\r
-#              added WebMiner  http://64.124.122.252/feedback.html\r
-# 2006-02-01\r
-#              added heritrix https://sourceforge.net/forum/message.php?msg_id=3550202\r
-#              added Zeus Webster Pro https://sourceforge.net/forum/message.php?msg_id=3141164\r
-#               additional robots from a list provided by Moizes Gabor [ mojzi -a-t- free mail hu ]\r
-#              added Candlelight_Favorites_Inspector\r
-#              added DomainChecker\r
-#              added EasyDL\r
-#              added FavOrg\r
-#              added Favorites_Sweeper\r
-#              added Html_Link_Validator\r
-#              added Internet_Ninja\r
-#              added JRTwine_Software_Check_Favorites_Utility\r
-#              fixed Microsoft_URL_Control\r
-#              added miniRank\r
-#              added Missigua_Locator\r
-#              added NPBot\r
-#              added Ocelli\r
-#              added Onet.pl_SA\r
-#              added proodleBot\r
-#              added SearchGuild_DMOZ_Experiment\r
-#              added Susie\r
-#              added Website_Monitoring_Bot\r
-#              added Xenu_Link_Sleuth\r
-# 2006-05-15\r
-#              added ASPseek http://www.aspseek.org/\r
-#              added AdamM Bot http://home.blic.net/adamm/\r
-#              added archive.org_bot http://crawls.archive.org/collections/bncf/crawl.html\r
-#              added arianna.libero.it (Italian Portal/search engine)\r
-#              added Biz360 spider http://www.biz360.com\r
-#              added BlogBridge Service http://www.blogbridge.com/\r
-#              added BlogSearch http://www.icerocket.com/\r
-#              added libcrawl\r
-#              added edgeio-relanshanbottriever http://www.edgeio.com\r
-#              added FeedFlow http://feedflow.com/about\r
-#              added Biblioteca Nazionale Centrale di Firenze (Italian National Archive) http://www.bncf.firenze.sbn.it/raccolta.txt\r
-#              added Java catchall - used by many spam bots\r
-#              added lanshanbot http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=%5Cbid_g_l_140406_1%5Cb\r
-#              added msnbot-media http://search.msn.com/msnbot.htm\r
-#              added MT::Telegraph::Agent\r
-#              added Netluchs http://www.netluchs.de/ (German SE bot)\r
-#              added oBot http://www.webmasterworld.com/forum11/1616.htm\r
-#              added Onfolio http://www.onfolio.com/  (IE Toolbar plugin) - hit rss feeds.\r
-#              added ping.blo.gs http://blo.gs/ping.php blog bot\r
-#              added Sphere Scout http://www.sphere.com/\r
-#              added sproose crawler http://www.sproose.com/bot.html\r
-#              added SyndicAPI http://syndicapi.com/bot.html\r
-#              added Yahoo! Mindset http://mindset.research.yahoo.com/\r
-#              added msrabot\r
-#              added Vagabondo & Vagabondo-WAP http://www.wise-guys.nl/Contact/index.php?botselected=webagents&amp;lang=uk\r
-#              fixed Missigua Locator detection (Missigua_Locator -> Missigua Locator)\r
-#              changed echo to echo! to avoid conflict with the bonecho (Firefox 2.0) browser.\r
-#                      This requires you to reprocess historic logs if you want EchO! to be recognized for older reports.\r
-# 2006-05-17\r
-#              added Alpha Search Agent # 62.152.125.60 Eurologon Srl\r
-#              added Krugle http://www.krugle.com/crawler/info.html the search engine for developers\r
-#              added Octora Beta Bot http://www.octora.com/ # Blog and Rss Search Engine\r
-#              added UbiCrawler http://law.dsi.unimi.it/ubicrawler/\r
-#              added Yahoo! Slurp China http://misc.yahoo.com.cn/help.html\r
-#                      You must reprocess old logs for the Yahoo! Slurp China bot to be detected in old reports\r
-# 2006-05-20\r
-#              added 1-More Scanner http://www.myzips.com/software/1-More-Scanner.phtml\r
-#              added Accoona-AI-Agent http://www.accoona.com/\r
-#              added ActiveBookmark http://www.libmaster.com/active_bookmark.php\r
-#              added BIGLOTRON http://www.biglotron.com/robot.html\r
-#              added Bookmark-Manager http://bkm.sourceforge.net/\r
-#              added cbn00glebot\r
-#              added Cerberian Drtrs http://www.pgts.com.au/cgi-bin/psql?robot_info=25240\r
-#              added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork\r
-#              added CheckWeb link validator http://p.duby.free.fr/chkweb.htm\r
-#              added Computer and Automation Research Institute Crawler http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html\r
-#              added ConveraCrawler http://www.authoritativeweb.com/crawl/\r
-#              added ConveraMultiMediaCrawler http://www.authoritativeweb.com/crawl/\r
-#              added CSE HTML Validator Lite Online http://online.htmlvalidator.com/php/onlinevallite.php\r
-#              added Cursor http://adcenter.hu/docs/en/bot.html\r
-#              added Custo http://www.netwu.com/custo/\r
-#              added DataFountains/DMOZ Downloader http://infomine.ucr.edu/\r
-#              added Deepindex http://www.deepindex.net/faq.php\r
-#              added DNSGroup http://www.dnsgroup.com/\r
-#              added DoCoMo http://www.nttdocomo.co.jp/\r
-#              added dumm.de-Bot http://www.dumm.de/\r
-#              added ETS v http://www.freetranslation.com/help/\r
-#              added eventax http://www.eventax.de/\r
-#              added FAST Enterprise Crawler * crawleradmin.t-info@telekom.de http://www.telekom.de/\r
-#              added FAST Enterprise Crawler http://www.fast.no/\r
-#              added FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de http://www.telekom.de/\r
-#              added FeedValidator http://feedvalidator.org/\r
-#              added FilmkameraBot http://www.filmkamera.at/bot.html\r
-#              added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece\r
-#              added Global Fetch http://www.wesonet.com/\r
-#              added GOFORITBOT http://www.goforit.com/about/\r
-#              added GoForIt.com http://www.goforit.com/about/\r
-#              added GPU p2p crawler http://gpu.sourceforge.net/search_engine.php\r
-#              added HooWWWer http://cosco.hiit.fi/search/hoowwwer/\r
-#              added HPPrint\r
-#              added HTMLParser http://htmlparser.sourceforge.net/\r
-#              added Hundesuche.com-Bot http://www.hundesuche.com/\r
-#              added InfoBot http://www.infobot.org/\r
-#              added InfociousBot http://corp.infocious.com/tech_crawler.php\r
-#              added InternetSupervision http://internetsupervision.com/\r
-#              added isearch2006 http://www.yahoo.com.cn/\r
-#              added IUPUI_Research_Bot http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/\r
-#              added KalamBot http://64.124.122.251/feedback.html\r
-#              added kamano.de NewsFeedVerzeichnis http://www.kamano.de/\r
-#              added Kevin http://dznet.com/kevin/\r
-#              added KnowItAll http://www.cs.washington.edu/research/knowitall/\r
-#              added Knowledge.com http://www.knowledge.com/\r
-#              added Kouaa Krawler http://www.kouaa.com/\r
-#              added ksibot http://ego.ms.mff.cuni.cz/\r
-#              added Link Valet Online http://www.htmlhelp.com/tools/valet/\r
-#              added lwp-request http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request\r
-#              added lwp-trivial http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm\r
-#              added MapoftheInternet.com http://MapoftheInternet.com/\r
-#              added Matrix S.p.A. - FAST Enterprise Crawler http://tin.virgilio.it/\r
-#              added Megite http://www.megite.com/\r
-#              added Metaspinner http://index.meta-spinner.de/\r
-#              added Mini-reptile\r
-#              added Misterbot http://www.misterbot.fr/\r
-#              added Miva http://www.miva.com/\r
-#              added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_m_141105_2\b\r
-#              added MSRBOT http://research.microsoft.com/research/sv/msrbot/\r
-#              added MS SharePoint Portal Server - MS Search 4.0 Robot http://support.microsoft.com/default.aspx?scid=kb;en-us;284022\r
-#              added Mydoyouhike http://www.doyouhike.net/my\r
-#              added NASA Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b\r
-#              added NetSprint http://www.netsprint.pl/serwis/\r
-#              added NimbleCrawler http://www.healthline.com/\r
-#              added OpenWebSpider http://www.openwebspider.org/\r
-#              added Oracle Ultra Search http://www.oracle.com/technology/products/ultrasearch/index.html\r
-#              added OSSProxy http://www.marketscore.com/FAQ.Aspx\r
-#              added passwordmaker.org http://passwordmaker.org/\r
-#              added PEAR HTTP Request class http://pear.php.net/\r
-#              added PEERbot http://www.peerbot.com/\r
-#              added PHP version tracker http://www.nexen.net/phpversion/bot.php\r
-#              added PictureOfInternet http://malfunction.org/poi/\r
-#              added plinki http://www.plinki.com/\r
-#              added Port Huron Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_1133\b\r
-#              added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_1135\b\r
-#              added ProjectWF-java-test-crawler\r
-#              added PyQuery http://sourceforge.net/projects/pyquery/\r
-#              added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/\r
-#              added Scumbot\r
-#              added Sensis Web Crawler http://www.sensis.com.au/\r
-#              added snap.com beta crawler http://www.snap.com/\r
-#              added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/\r
-#              added STEROID  Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm\r
-#              added Suchfin-Bot http://www.suchfin.de/\r
-#              added Sunrise http://www.sunrisexp.com/\r
-#              added Tagyu Agent http://www.tagyu.com/\r
-#              added Tcl http client package http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm\r
-#              added TeragramCrawlerSURF http://www.teragram.com/\r
-#              added Test Crawler http://netp.ath.cx/\r
-#              added UnChaos Bot Hybrid Web Search Engine http://www.unchaos.com/\r
-#              added unido-bot http://www.unchina.org/unido/unido/our_projects/3_3.html\r
-#              added UniversalFeedParser http://feedparser.org/ (seen from md301000.inktomisearch.com)\r
-#              added updated http://www.updated.com/\r
-#              added Vermut http://vermut.aol.com\r
-#              added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html\r
-#              added Vespa Crawler (Yahoo Norway?) http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb\r
-#              added VSE http://www.vivisimo.com/\r
-#              added webcrawl.net http://www.webcrawl.net/\r
-#              added Web Downloader http://www.krasu.ru/soft/chuchelo/\r
-#              added Webdup http://www.webdup.com/en/index.html\r
-#              added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_t_z_1484\b\r
-#              added WordPress http://wordpress.org/\r
-#              added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/\r
-#              added Xenu's Link Sleuth (with ')\r
-#              added xirq http://www.xirq.com/\r
-#              added yoogliFetchAgent http://www.yoogli.com/\r
-#              added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/\r
-#              -- fix - some robots were reported with _ where _ should have been a space.\r
-#              changed Xenu Link Sleuth\r
-#              changed microsoft[_+\s]url[_+\s]control -> microsoft_url_control\r
-#              changed favorites_sweeper -> favorites_sweeper\r
-#              -- updates\r
-#              updated AskJeeves to Ask\r
-# 2012-06-05 Albrecht Mueller\r
-#              added Grabber from SDSC (San Diego Supercomputer Center).\r
-# 2013-09-30 Albrecht Mueller\r
-# AWStats probably cannot detect this bot as it identifies itself in\r
-# the referrer field and not in the user agent string.\r
-#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"\r
-#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"\r
-#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"\r
-\r
-# to do  MS Search 4.0 Robot\r
-\r
-#package AWSROB;\r
-\r
-\r
-# Robots list was found at http://www.robotstxt.org/wc/active/all.txt\r
-# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html\r
-# Rem: To avoid bad detection, some robot's ids were removed from this list:\r
-#      - Robots with ID of 3 letters only\r
-#      - Robots called 'webs' and 'tcl'\r
-# Rem: directhit changed into direct_hit (its real id)\r
-# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser\r
-# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser\r
-# Rem: roadrunner changed into road_runner\r
-# Rem: lycos changed to lycos_ to avoid confusion with lycos-online browser\r
-# Rem: voyager changed into ^voyager\/ to avoid to exclude voyager and amigavoyager browser\r
-\r
-# RobotsSearchIDOrder\r
-# It contains all matching criteria to search for in log fields. This list is\r
-# used to know in which order to search Robot IDs.\r
-# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more\r
-# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more\r
-# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+\s]' and are quoted.\r
-#-------------------------------------------------------\r
-@RobotsSearchIDOrder_list1 = (\r
-# Common robots (In robot file)\r
-'appie',\r
-'architext',\r
-'bingpreview',\r
-'bjaaland',\r
-'contentmatch',\r
-'ferret',\r
-'googlebot\-image',\r
-'googlebot',\r
-'google\-sitemaps',\r
-'google[_+\s]web[_+\s]preview',\r
-'grabber',\r
-'gulliver',\r
-'virus[_+\s]detector',         # Must be before harvest\r
-'harvest',\r
-'htdig',\r
-'jeeves',\r
-'linkwalker',\r
-'lilina',\r
-'lycos[_+\s]',\r
-'moget',\r
-'muscatferret',\r
-'myweb',\r
-'nomad',\r
-'scooter',\r
-'slurp',\r
-'^voyager\/',\r
-'weblayers',\r
-# Common robots (Not in robot file)\r
-'antibot',\r
-'bruinbot',\r
-'digout4u',\r
-'echo!',\r
-'fast\-webcrawler',\r
-'ia_archiver\-web\.archive\.org', # Must be before ia_archiver to avoid confusion with alexa\r
-'ia_archiver',\r
-'jennybot',\r
-'mercator',\r
-'netcraft',\r
-'msnbot\-media',\r
-'msnbot-udiscovery',\r
-'msnbot',\r
-'petersnews',\r
-'relevantnoise\.com',\r
-'unlost_web_crawler',\r
-'voila',\r
-'webbase',\r
-'webcollage',\r
-'cfetch',\r
-'zyborg',      # Must be before wisenut\r
-'wisenutbot'\r
-);\r
-@RobotsSearchIDOrder_list2 = (\r
-# Less common robots (In robot file)\r
-'007ac9',\r
-'[^a]fish',\r
-'abcdatos',\r
-'abonti\.com',\r
-'acme\.spider',\r
-'ahoythehomepagefinder',\r
-'ahrefsbot',\r
-'alkaline',\r
-'anthill',\r
-'arachnophilia',\r
-'arale',\r
-'araneo',\r
-'aretha',\r
-'ariadne',\r
-'powermarks',\r
-'arks',\r
-'aspider',\r
-'atn\.txt',\r
-'atomz',\r
-'auresys',\r
-'backrub',\r
-'bbot',\r
-'bigbrother',\r
-'blackwidow',\r
-'blindekuh',\r
-'bloodhound',\r
-'borg\-bot',\r
-'brightnet',\r
-'bspider',\r
-'cactvschemistryspider',\r
-'calif[^r]',\r
-'cassandra',\r
-'cgireader',\r
-'checkbot',\r
-'christcrawler',\r
-'churl',\r
-'cienciaficcion',\r
-'cms\scrawler',\r
-'collective',\r
-'combine',\r
-'conceptbot',\r
-'coolbot',\r
-'core',\r
-'cosmos',\r
-'crazywebcrawler',\r
-'cruiser',\r
-'cusco',\r
-'cyberspyder',\r
-'desertrealm',\r
-'deweb',\r
-'dienstspider',\r
-'digger',\r
-'diibot',\r
-'direct_hit',\r
-'dnabot',\r
-'domainappender',\r
-'download_express',\r
-'dragonbot',\r
-'dwcp',\r
-'e\-collector',\r
-'ebiness',\r
-'elfinbot',\r
-'emacs',\r
-'emcspider',\r
-'esther',\r
-'evliyacelebi',\r
-'fastcrawler',\r
-'feedcrawl',\r
-'fdse',\r
-'felix',\r
-'fetchrover',\r
-'fido',\r
-'finnish',\r
-'fireball',\r
-'fouineur',\r
-'francoroute',\r
-'freecrawl',\r
-'funnelweb',\r
-'gama',\r
-'gazz',\r
-'gcreep',\r
-'getbot',\r
-'geturl',\r
-'golem',\r
-'gougou',\r
-'grapnel',\r
-'griffon',\r
-'gromit',\r
-'gulperbot',\r
-'hambot',\r
-'havindex',\r
-'hometown',\r
-'htmlgobble',\r
-'hyperdecontextualizer',\r
-'iajabot',\r
-'iaskspider',\r
-'hl_ftien_spider',\r
-'sogou',\r
-'icjobs\.de',\r
-'iconoclast',\r
-'ilse',\r
-'imagelock',\r
-'incywincy',\r
-'informant',\r
-'infoseek',\r
-'infoseeksidewinder',\r
-'infospider',\r
-'inspectorwww',\r
-'intelliagent',\r
-'irobot',\r
-'iron33',\r
-'israelisearch',\r
-'javabee',\r
-'jbot',\r
-'jcrawler',\r
-'jobo',\r
-'jobot',\r
-'joebot',\r
-'jubii',\r
-'jumpstation',\r
-'kapsi',\r
-'katipo',\r
-'kilroy',\r
-'ko[_+\s]yappo[_+\s]robot',\r
-'kummhttp',\r
-'labelgrabber\.txt',\r
-'larbin',\r
-'legs',\r
-'linkidator',\r
-'linkscan',\r
-'lockon',\r
-'logo_gif',\r
-'macworm',\r
-'magpie',\r
-'marvin',\r
-'mattie',\r
-'mediafox',\r
-'merzscope',\r
-'meshexplorer',\r
-'mindcrawler',\r
-'mnogosearch',\r
-'momspider',\r
-'monster',\r
-'motor',\r
-'muncher',\r
-'mwdsearch',\r
-'ndspider',\r
-'nederland\.zoek',\r
-'netcarta',\r
-'netmechanic',\r
-'netscoop',\r
-'newscan\-online',\r
-'nhse',\r
-'northstar',\r
-'nzexplorer',\r
-'objectssearch',\r
-'occam',\r
-'octopus',\r
-'openfind',\r
-'orb_search',\r
-'packrat',\r
-'pageboy',\r
-'parasite',\r
-'patric',\r
-'pegasus',\r
-'perignator',\r
-'perlcrawler',\r
-'phantom',\r
-'phpdig',\r
-'piltdownman',\r
-'pimptrain',\r
-'pioneer',\r
-'pitkow',\r
-'pjspider',\r
-'plumtreewebaccessor',\r
-'poppi',\r
-'portalb',\r
-'psbot',\r
-'python',\r
-'raven',\r
-'rbse',\r
-'resumerobot',\r
-'rhcs',\r
-'road_runner',\r
-'robbie',\r
-'robi',\r
-'robocrawl',\r
-'robofox',\r
-'robozilla',\r
-'roverbot',\r
-'rules',\r
-'safetynetrobot',\r
-'semalt', #Note: This entry will not work as this crawler identifies itself\r
-# in the referrer string and not in the user agent string\r
-'search\-info',\r
-'search_au',\r
-'searchprocess',\r
-'senrigan',\r
-'sgscout',\r
-'shaggy',\r
-'shaihulud',\r
-'sift',\r
-'simbot',\r
-'sistrix', #Virus/trojan-infection? fr-crawler, ca-crawler? See https://www.projecthoneypot.org/ip_37.59.55.128, https://www.projecthoneypot.org/ip_198.27.80.144\r
-'site\-valet',\r
-'sitetech',\r
-'skymob',\r
-'slcrawler',\r
-'smartspider',\r
-'snooper',\r
-'solbot',\r
-'speedy',\r
-'spider[_+\s]monkey',\r
-'spiderbot',\r
-'spiderline',\r
-'spiderman',\r
-'spiderview',\r
-'spry',\r
-'sqworm',\r
-'ssearcher',\r
-'suke',\r
-'sunrise',\r
-'suntek',\r
-'sven',\r
-'tach_bw',\r
-'tagyu_agent',\r
-'tailrank',\r
-'tarantula',\r
-'tarspider',\r
-'techbot',\r
-'templeton',\r
-'titan',\r
-'titin',\r
-'tkwww',\r
-'tlspider',\r
-'ucsd',\r
-'udmsearch',\r
-'universalfeedparser',\r
-'urlck',\r
-'valkyrie',\r
-'verticrawl',\r
-'victoria',\r
-'visionsearch',\r
-'voidbot',\r
-'vwbot',\r
-'w3index',\r
-'w3m2',\r
-'wallpaper',\r
-'wanderer',\r
-'wapspIRLider',\r
-'webbandit',\r
-'webcatcher',\r
-'webcopy',\r
-'webfetcher',\r
-'webfoot',\r
-'webinator',\r
-'weblinker',\r
-'webmirror',\r
-'webmoose',\r
-'webquest',\r
-'webreader',\r
-'webreaper',\r
-'websnarf',\r
-'webspider',\r
-'webvac',\r
-'webwalk',\r
-'webwalker',\r
-'webwatch',\r
-'whatuseek',\r
-'whowhere',\r
-'wired\-digital',\r
-'wmir',\r
-'wolp',\r
-'wombat',\r
-'wordpress',\r
-'worm',\r
-'woozweb',\r
-'wwwc',\r
-'wz101',\r
-'xenu\slink\ssleuth',\r
-'xget',\r
-# Other robots reported by users\r
-'^finbot', #UA string starts with "finbot", should not match "elfinbot"\r
-'^webindex$', #UA should not match "webindexer"\r
-'1\-more_scanner',\r
-'360spider',\r
-'a6-indexer',\r
-'accoona\-ai\-agent',\r
-'activebookmark',\r
-'adamm_bot',\r
-'adsbot-google',\r
-'advbot',\r
-'affectv\.co\.uk',\r
-'almaden',\r
-'aipbot',\r
-'aleadsoftbot',\r
-'alpha_search_agent',\r
-'allrati',\r
-'aport',\r
-'applebot',\r
-'archive\-de\.com',\r
-'archive\.org_bot',\r
-'argus',               # Must be before nutch\r
-'arianna\.libero\.it',\r
-'aspseek',\r
-'asterias',\r
-'awbot',\r
-'backlinktest\.com',\r
-'baiduspider',\r
-'becomebot',\r
-'bender',\r
-'betabot',\r
-'biglotron',\r
-'bittorrent_bot',\r
-'biz360[_+\s]spider',\r
-'blexbot',\r
-'blogbridge[_+\s]service',\r
-'bloglines',\r
-'blogpulse',\r
-'blogsearch',\r
-'blogshares',\r
-'blogslive',\r
-'blogssay',\r
-'bncf\.firenze\.sbn\.it\/raccolta\.txt',\r
-'bobby',\r
-'boitho\.com\-dc',\r
-'bookmark\-manager',\r
-'boris',\r
-'bubing',\r
-'bumblebee',\r
-'candlelight[_+\s]favorites[_+\s]inspector',\r
-'careerbot',\r
-'cbn00glebot',\r
-'ccbot',\r
-'cerberian_drtrs',\r
-'cfnetwork',\r
-'cipinetbot',\r
-'checkweb_link_validator',\r
-'cliqzbot',\r
-'commons\-httpclient',\r
-'computer_and_automation_research_institute_crawler',\r
-'converamultimediacrawler',\r
-'converacrawler',\r
-'copubbot',\r
-'cscrawler',\r
-'cse_html_validator_lite_online',\r
-'cuasarbot',\r
-'cursor',\r
-'custo',\r
-'datafountains\/dmoz_downloader',\r
-'dataprovider\.com',\r
-'daumoa',\r
-'daviesbot',\r
-'daypopbot',\r
-'deepindex',\r
-'deusu',\r
-'dipsie\.bot',\r
-'dnsgroup',\r
-'doccheckbot',\r
-'domainchecker',\r
-'domainsdb\.net',\r
-'dotbot',\r
-'duckduckgo-favicons-bot',\r
-'dulance',\r
-'dumbot',\r
-'dumm\.de\-bot',\r
-'earthcom\.info',\r
-'easydl',\r
-'eccp',\r
-'edgeio\-retriever',\r
-'ernst[:blank:]2\.0',\r
-'ets_v',\r
-'exactseek',\r
-'extreme[_+\s]picture[_+\s]finder',\r
-'eventax',\r
-'everbeecrawler',\r
-'everest\-vulcan',\r
-'ezresult',\r
-'enteprise',\r
-'facebook',\r
-'facebot',\r
-'fast_enterprise_crawler.*crawleradmin\.t\-info@telekom\.de',\r
-'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de',\r
-'finderlein[_+\s]research[_+\s]crawler',\r
-'matrix_s\.p\.a\._\-_fast_enterprise_crawler', # must come before fast enterprise crawler\r
-'fast_enterprise_crawler',\r
-'fast\-search\-engine',\r
-'fastbot',\r
-'favicon',\r
-'favorg',\r
-'favorites_sweeper',\r
-'feedburner',\r
-'feedfetcher\-google',\r
-'feedflow',\r
-'feedster',\r
-'feedsky',\r
-'feedvalidator',\r
-'fetchbot',\r
-'filmkamerabot',\r
-'filterdb\.iss\.net',\r
-'findlinks',\r
-'findexa_crawler',\r
-'firmilybot',\r
-'foaf-search\.net',\r
-'fooky\.com\/ScorpionBot',\r
-'g2crawler',\r
-'gaisbot',\r
-'geniebot',\r
-'genieo',\r
-'gigablastopensource',\r
-'gigabot',\r
-'girafabot',\r
-'global_fetch',\r
-'gnodspider',\r
-'goforit\.com',\r
-'goforitbot',\r
-'gonzo',\r
-'grapeshot',\r
-'grub',\r
-'gpu_p2p_crawler',\r
-'henrythemiragorobot',\r
-'heritrix',\r
-'holmes',\r
-'hoowwwer',\r
-'hpprint',\r
-'htmlparser',\r
-'html[_+\s]link[_+\s]validator',\r
-'httrack',\r
-'hundesuche\.com\-bot',\r
-'i-bot',\r
-'icarus6j',\r
-'ichiro',\r
-'idmarch',\r
-'iltrovatore\-setaccio',\r
-'implisensebot',\r
-'infobot',\r
-'infociousbot',\r
-'infohelfer',\r
-'infomine',\r
-'insurancobot',\r
-'integromedb\.org',\r
-'internet[_+\s]ninja',\r
-'internetarchive',\r
-'internetseer',\r
-'internetsupervision',\r
-'ips\-agent',\r
-'irlbot',\r
-'isearch2006',\r
-'istellabot',\r
-'iupui_research_bot',\r
-'izsearch',\r
-'james\sbot',\r
-'jobboerse', #AWStats seems not to find this one despite the fact that "JobboerseBot" and "jobboerse.com" appear in the UA-string, maybe some previous entry matches\r
-'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility',\r
-'justview',\r
-'kalambot',\r
-'kamano\.de_newsfeedverzeichnis',\r
-'kazoombot',\r
-'kevin',\r
-'keyoshid', # Must come before Y!J\r
-'kinjabot',\r
-'kinja\-imagebot',\r
-'knowitall',\r
-'knowledge\.com',\r
-'kouaa_krawler',\r
-'krugle',\r
-'ksibot',\r
-'kurzor',\r
-'lanshanbot',\r
-'letscrawl\.com',\r
-'libcrawl',\r
-'linkbot',\r
-'linkdex\.com',\r
-'link_valet_online',\r
-'metager\-linkchecker',        # Must be before linkchecker\r
-'linkchecker',\r
-'linkstats\sbot',\r
-'lipperhey',\r
-'livejournal\.com',\r
-'lmspider',\r
-'loadtimebot',\r
-'lssrocketcrawler',\r
-'ltbot',\r
-'ltx71',\r
-'lwp\-request',\r
-'lwp\-trivial',\r
-'madaali\.de',\r
-'magpierss',\r
-'mail\.ru',\r
-'mapoftheinternet\.com',\r
-'meanpathbot',\r
-'mediabot',\r
-'mediapartners\-google',\r
-'megaindex',\r
-'megite',\r
-'memorybot',\r
-'metager2-verification-bot',\r
-'metajobbot', #Does not show up in the results of Sep. 2015 despite the fact that the corresponing log file has about 40 entries containing "MetaJobBot" in the UA string - strange.\r
-'metaspinner',\r
-'miadev',\r
-'microsoft\sbits',\r
-'microsoft.*discovery', # = 'microsoft (?:office (?:protocol|existence)|data access internet publishing provider protocol) discovery',\r
-'microsoft[_+\s]url[_+\s]control',\r
-'mindupbot',\r
-'mini\-reptile',\r
-'minirank',\r
-'missigua_locator',\r
-'misterbot',\r
-'miva',\r
-'mizzu_labs',\r
-'mj12bot',\r
-'mojeekbot',\r
-'msiecrawler',\r
-'ms[_+\s]search[_+\s]6\.0[_+\s]robot',\r
-'ms_search_4\.0_robot',\r
-'msrabot',\r
-'msrbot',\r
-'mt::telegraph::agent',\r
-'mydoyouhike',\r
-'nagios',\r
-'nasa_search',\r
-'netestate\sne\scrawler',\r
-'netluchs',\r
-'netsprint',\r
-'newsgatoronline',\r
-'nicebot',\r
-'nimblecrawler',\r
-'noxtrumbot',\r
-'npbot',\r
-'loocalcrawler/nutch',\r
-'nutchcvs',\r
-'nutchosu\-vlib',\r
-'nutch',  # Must come after other nutch versions\r
-'ocelli',\r
-'octora_beta_bot',\r
-'omniexplorer[_+\s]bot',\r
-'onet\.pl[_+\s]sa',\r
-'onfolio',\r
-'opentaggerbot',\r
-'openwebspider',\r
-'optimizer',\r
-'oracle_ultra_search',\r
-'orangebot',\r
-'orbiter',\r
-'yodaobot',\r
-'qihoobot',\r
-'qwantify',\r
-'passwordmaker\.org',\r
-'pear_http_request_class',\r
-'peerbot',\r
-'perman',\r
-'php[_+\s]version[_+\s]tracker',\r
-'phpcrawl',\r
-'picmole',\r
-'pictureofinternet',\r
-'ping\.blo\.gs',\r
-'plinki',\r
-'pluckfeedcrawler',\r
-'plukkie',\r
-'pogodak',\r
-'pompos',\r
-'popdexter',\r
-'port_huron_labs',\r
-'postfavorites',\r
-'projectwf\-java\-test\-crawler',\r
-'proodlebot',\r
-'publiclibraryarchive',\r
-'pyquery',\r
-'rambler',\r
-'redalert',\r
-'riddler',\r
-'rogerbot',\r
-'rojo',\r
-'rssimagesbot',\r
-'ruffle',\r
-'rufusbot',\r
-'safeads\.xyz',\r
-'safesearch',\r
-'sandcrawler',\r
-'savetheworldheritage',\r
-'sbider',\r
-'schizozilla',\r
-'scumbot',\r
-'searchguild[_+\s]dmoz[_+\s]experiment',\r
-'searchmetricsbot',\r
-'seekbot',\r
-'semrushbot',\r
-'sensis_web_crawler',\r
-'seodiver',\r
-'seokicks\.de',\r
-'seoscanners',\r
-'seznambot',\r
-'shim\-crawler',\r
-'shoutcast',\r
-'sitedomain-bot',\r
-'siteexplorer\.info',\r
-'skimbot',\r
-'slysearch',\r
-'smtbot',\r
-'snap\.com_beta_crawler',\r
-'sohu\-search',\r
-'sohu', # "sohu agent"\r
-'snappy',\r
-'spbot',\r
-'sphere_scout',\r
-'spiderlytics',\r
-'spip',\r
-'sproose_crawler',\r
-'ssearch_bot',\r
-'steeler',\r
-'steroid__download',\r
-'stq_bot',\r
-'suchfin\-bot',\r
-'superbot',\r
-'surveybot',\r
-'susie',\r
-'syndic8',\r
-'syndicapi',\r
-'synoobot',\r
-'tcl_http_client_package',\r
-'technoratibot',\r
-'teragramcrawlersurf',\r
-'test_crawler',\r
-'testbot',\r
-'thumbsniper',\r
-'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',\r
-'topicblogs',\r
-'turnitinbot',\r
-'turtlescanner',               # Must be before turtle\r
-'turtle',\r
-'tutorgigbot',\r
-'twiceler',\r
-'ubicrawler',\r
-'ultraseek',\r
-'unchaos_bot_hybrid_web_search_engine',\r
-'unido\-bot',\r
-'unisterbot',\r
-'updated',\r
-'ustc\-semantic\-group',\r
-'vagabondo\-wap',\r
-'vagabondo',\r
-'vebidoobot',\r
-'vermut',\r
-'versus_crawler_from_eda\.baykan@epfl\.ch',\r
-'vespa_crawler',\r
-'voltron',\r
-'vortex',\r
-'vse\/',\r
-'w3c\-checklink',\r
-'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa',\r
-'w3c_validator',\r
-'watchmouse',\r
-'wavefire',\r
-'waybackarchive\.org',\r
-'wbsearchbot',\r
-'webclipping\.com',\r
-'webcompass',\r
-'webcrawl\.net',\r
-'web_downloader',\r
-'webdup',\r
-'webfilter',\r
-'webindexer',\r
-'webminer',\r
-'website[_+\s]monitoring[_+\s]bot',\r
-'webvulncrawl',\r
-'wells_search',\r
-'wer-liefert-was',\r
-'wesee:search',\r
-'wevikabot',\r
-'wonderer',\r
-'wotbox',\r
-'wume_crawler',\r
-'wwweasel',\r
-'xenu\'s_link_sleuth',\r
-'xenu_link_sleuth',\r
-'xirq',\r
-'xovibot',\r
-'y!j', # Must come after keyoshid Y!J\r
-'yacy',\r
-'yahoo\-blogs',\r
-'yahoo\-verticalcrawler',\r
-'yahoofeedseeker',\r
-'yahooseeker\-testing',\r
-'yahooseeker',\r
-'yahoo\-mmcrawler',\r
-'yahoo!_mindset',\r
-'yandex',\r
-'flexum',\r
-'yanga',\r
-'yet-another-spider',\r
-'yisouspider',\r
-'yooglifetchagent',\r
-'z\-add_link_checker',\r
-'zealbot',\r
-'zhuaxia',\r
-'zspider',\r
-'zeus',\r
-'ng\/1\.', # put at end to avoid false positive\r
-'ng\/2\.', # put at end to avoid false positive\r
-'exabot',  # put at end to avoid false positive\r
-# Additional bots found by Sussex.\r
-'^[1-3]$', # Hiding bots. Doesn't appear to be a valid user agent.\r
-'alltop',\r
-'applesyndication',\r
-'asynchttpclient',\r
-'bingbot',\r
-'blogged_crawl',\r
-'bloglovin',\r
-'butterfly',\r
-'buzztracker',\r
-'carpathia',\r
-'catbot',\r
-'chattertrap',\r
-'check_http', #(nagios) a monitoring tool\r
-'coldfusion',\r
-'covario',\r
-'daylifefeedfetcher',\r
-'discobot',\r
-'dlvr\.it',\r
-'dreamwidth',\r
-'drupal',\r
-'ezoom',\r
-'feedmyinbox',\r
-'feedroll\.com',\r
-'feedzira',\r
-'fever\/',\r
-'freenews',\r
-'geohasher',\r
-'hanrss',\r
-'inagist',\r
-'jacobin\sclub',\r
-'jakarta',\r
-'js\-kit',\r
-'largesmall\scrawler',\r
-'linkedinbot',\r
-'longurl',\r
-'metauri',\r
-'microsoft\-webdav\-miniredir',\r
-'^motorola$',\r
-'movabletype',\r
-# These appear to be bots trying to hide. All of the usual architecture data is missing.\r
-'^mozilla\/3\.0\s\(compatible$',\r
-'^mozilla\/4\.0$',\r
-'^mozilla\/4\.0\s\(compatible;\)$',\r
-'^mozilla\/5\.0$',\r
-'^mozilla\/5\.0\s\(compatible;$',\r
-'^mozilla\/5\.0\s\(en\-us\)$',\r
-'^mozilla\/5\.0\sfirefox\/3\.0\.5$',\r
-'^msie',\r
-# End of hiding bots.\r
-'netnewswire',\r
-'\snetseer\s',\r
-'netvibes',\r
-'newrelicpinger',\r
-'newsfox',\r
-'nextgensearchbot',\r
-'ning',\r
-'pingdom',\r
-'pita',\r
-'postpost',\r
-'postrank',\r
-'printfulbot',\r
-'protopage',\r
-'proximic',\r
-'quipply',\r
-'r6\_',\r
-'ratingburner',\r
-'regator',\r
-'rome\sclient',\r
-'rpt\-httpclient',\r
-'rssgraffiti',\r
-'sage\+\+',\r
-'scoutjet',\r
-'simplepie',\r
-'sitebot',\r
-'summify\.com',\r
-'superfeedr',\r
-'synthesio',\r
-'teoma',\r
-'topblogsinfo',\r
-'topix\.net',\r
-'trapit',\r
-'trileet',\r
-'tweetedtimes',\r
-'twisted\spagegetter',\r
-'twitterbot',\r
-'twitterfeed',\r
-'unwindfetchor',\r
-'wazzup',\r
-'windows\-rss\-platform',\r
-'wiumi',\r
-'xydo',\r
-'yahoo!\sslurp',\r
-'yahoo\spipes',\r
-'yahoo\-newscrawler',\r
-'yahoocachesystem',\r
-'yahooexternalcache',\r
-'yahoo!\ssearchmonkey',\r
-'yahooysmcm',\r
-'yammer',\r
-# 'yandexbot', #already covered by 'yandex'\r
-'yeti',\r
-'yie8',\r
-'youdao',\r
-'yourls',\r
-'zemanta',\r
-'zend_http_client',\r
-'zumbot',\r
-# Other id that are 99% of robots\r
-'wget',\r
-'libwww',\r
-'^java\/[0-9]'   # put at end to avoid false positive\r
-);\r
-@RobotsSearchIDOrder_listgen = (\r
-# Generic robot\r
-'robot',\r
-'checker',\r
-'crawl',\r
-'discovery',\r
-'hunter',\r
-'scanner',\r
-'spider',\r
-'sucker',\r
-'bot[\s_+:,\.\;\/\\\-]',\r
-# Identifies\r
-#"Mozilla/5.0 (Linux; U; Android 4.2.2; de-de; CUBOT P9 Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"\r
-#as a but. There is a Android mobile phone called "CUBOT P9", so this is probably not a bot.\r
-'[\s_+:,\.\;\/\\\-]bot',\r
-'curl',\r
-'php',\r
-'ruby\/',\r
-'no_user_agent'\r
-);\r
-\r
-\r
-\r
-# RobotsHashIDLib\r
-# List of robots names ('robot id','robot clear text')\r
-#-------------------------------------------------------\r
-%RobotsHashIDLib   = (\r
-# Common robots (In robot file)\r
-'appie','<a href="http://www.walhello.com/" title="Bot home page [new window]" target="_blank">Walhello appie</a>',\r
-'architext','ArchitextSpider',\r
-'bingpreview','Bing Preview bot',\r
-'bjaaland','Bjaaland',\r
-'ferret','Wild Ferret Web Hopper #1, #2, #3',\r
-'contentmatch','<a href="http://p4p.cn.yahoo.com">Yahoo!China ContentMatch Crawler</a>',\r
-'googlebot\-image','<a href="http://www.google.com/bot.html" title="Bot home page [new window]" target="_blank">Googlebot-Image</a>',\r
-'googlebot','<a href="http://www.google.com/bot.html" title="Bot home page [new window]" target="_blank">Googlebot</a>',\r
-'google\-sitemaps', 'Google Sitemaps',\r
-'grabber', '<a href="http://www.sdsc.edu/" title="Seltsame Aktivitaeten vom San Diego Supercomputer Center [new window]" target="_blank">Grabber (SDSC)</a>',\r
-'google[_+\s]web[_+\s]preview', 'Google Web Preview',\r
-'gulliver','Northern Light Gulliver',\r
-'virus[_+\s]detector','<a href="http://www.securecomputing.com/" title="virus_harvester@securecomputing.com; Bot home page [new window]" target="_blank">virus_detector</a>',\r
-'harvest','Harvest',\r
-'htdig','ht://Dig',\r
-'jeeves','<a href="http://sp.ask.com/docs/about/tech_crawling.html" title="Bot home page [new window]" target="_blank">Ask</a>',\r
-'linkwalker','LinkWalker',\r
-'lilina','Lilina',\r
-'lycos[_+\s]','Lycos',\r
-'moget','moget',\r
-'muscatferret','Muscat Ferret',\r
-'myweb','Internet Shinchakubin',\r
-'nomad','Nomad',\r
-'scooter','Scooter',\r
-'slurp','<a href="http://help.yahoo.com/help/us/ysearch/slurp/" title="Bot home page [new window]" target="_blank">Yahoo Slurp</a>',\r
-'^voyager\/','Voyager',\r
-'weblayers','Weblayers',\r
-# Common robots (Not in robot file)\r
-'antibot','Antibot',\r
-'bruinbot','<a href="http://web.archive.org/" title="BruinBot home page [new window]" target="_blank">The web archive</a>',\r
-'digout4u','Digout4u',\r
-'echo!','EchO!',\r
-'fast\-webcrawler','Fast-Webcrawler',\r
-'ia_archiver\-web\.archive\.org','<a href="http://web.archive.org/" title="Bot home page [new window]" target="_blank">The web archive (IA Archiver)</a>',\r
-'ia_archiver','<a href="http://www.alexa.com/" title="Bot home page [new window]" target="_blank">Alexa (IA Archiver)</a>',\r
-'jennybot','JennyBot',\r
-'mercator','Mercator',\r
-'msnbot\-media','<a href="http://search.msn.com/msnbot.htm" title="Bot home page [new window]" target="_blank">MSNBot-media</a>',\r
-'msnbot-udiscovery', '<a href="http://search.msn.com/msnbot.htm" title="Feb 18, 2015: UA contains indentification during robots.txt access only." target="_blank">msnbot-UDiscovery</a> Note: AWStats counts most of its traffic as user traffic',\r
-'msnbot','<a href="http://search.msn.com/msnbot.htm" title="Bot home page [new window]" target="_blank">MSNBot</a>',\r
-'netcraft','<a href="http://www.netcraft.com/survey/" title="Bot home page [new window]" target="_blank">Netcraft</a>',\r
-'petersnews','Petersnews',\r
-'unlost_web_crawler','Unlost Web Crawler',\r
-'voila','Voila',\r
-'webbase', 'WebBase',\r
-'zyborg','<a href="http://www.WISEnutbot.com/" title="wn-14.zyborg@looksmart.net Bot home page [new window]" target="_blank">ZyBorg</a>',\r
-'wisenutbot','<a href="http://www.WISEnutbot.com/" title="Bot home page [new window]" target="_blank">WISENutbot</a>',\r
-'webcollage','<a href="http://www.jwz.org/webcollage/" title="WebCollage home page [new window]" target="_blank">WebCollage</a>',\r
-'cfetch','<a href="http://www.kosmix.com/crawler.html" title="kosmix home page [new window]" target="_blank">Cfetch</a>',\r
-# Less common robots (In robot file)\r
-'007ac9',  '<a href="http://crawler.007ac9.net/" rel="nofollow" title="007ac9 Crawler Page [new window]" target="_blank">007ac9 Crawler</a>, seems to belong to <a href="http://www.sistrix.com/" rel="nofollow" title="SISTRIX Home Page [new window]" target="_blank">SISTRIX</a>',\r
-'[^a]fish','Fish search',\r
-'abcdatos','ABCdatos BotLink',\r
-'abonti\.com','<a href="http://www.abonti.com/" title="Abonti WebSearch [new window]" target="_blank">Abonti WebSearch</a>',\r
-'acme\.spider','Acme.Spider',\r
-'ahoythehomepagefinder','Ahoy! The Homepage Finder',\r
-'ahrefsbot', '<a href="http://ahrefs.com/robot/" title="Bot home page [new window]" target="_blank">AhrefsBot</a>',\r
-'alkaline','Alkaline',\r
-'anthill','Anthill',\r
-'arachnophilia','Arachnophilia',\r
-'arale','Arale',\r
-'araneo','Araneo',\r
-'aretha','Aretha',\r
-'ariadne','ARIADNE',\r
-'powermarks','<a href="http://www.kaylon.com/power.html" title="Bot home page [new window]" target="_blank">Powermarks</a>', # must come before Arks; seen used by referrer spam\r
-'arks','arks',\r
-'aspider','ASpider (Associative Spider)',\r
-'atn\.txt','ATN Worldwide',\r
-'atomz','Atomz.com Search Robot',\r
-'auresys','AURESYS',\r
-'backrub','BackRub',\r
-'bbot','BBot',\r
-'bigbrother','Big Brother',\r
-'blackwidow','BlackWidow',\r
-'blindekuh','Die Blinde Kuh',\r
-'bloodhound','Bloodhound',\r
-'borg\-bot','Borg-Bot',\r
-'brightnet','bright.net caching robot',\r
-'bspider','BSpider',\r
-'cactvschemistryspider','CACTVS Chemistry Spider',\r
-'calif[^r]','Calif',\r
-'cassandra','Cassandra',\r
-'cgireader','Digimarc Marcspider/CGI',\r
-'checkbot','Checkbot',\r
-'christcrawler','ChristCrawler.com',\r
-'churl','churl',\r
-'cienciaficcion','cIeNcIaFiCcIoN.nEt',\r
-'cms\scrawler', '<a href="http://www.cmscrawler.com" rel="nofollow" title="CMS Crawler Home Page [new window]" target="_blank">CMS Crawler</a>',\r
-'collective','Collective',\r
-'combine','Combine System',\r
-'conceptbot','Conceptbot',\r
-'coolbot','CoolBot',\r
-'core','Web Core / Roots',\r
-'cosmos','XYLEME Robot',\r
-'crazywebcrawler', '<a href="http://www.crazywebcrawler.com/" rel="nofollow" title="CrazyWeb Crawler Home Page [new window]" target="_blank">CrazyWeb Crawler</a>',\r
-'cruiser','Internet Cruiser Robot',\r
-'cusco','Cusco',\r
-'cyberspyder','CyberSpyder Link Test',\r
-'desertrealm','Desert Realm Spider',\r
-'deweb','DeWeb(c) Katalog/Index',\r
-'dienstspider','DienstSpider',\r
-'digger','Digger',\r
-'diibot','Digital Integrity Robot',\r
-'direct_hit','Direct Hit Grabber',\r
-'dnabot','DNAbot',\r
-'domainappender',  '<a href="http://www.profound.net/domainappender" rel="nofollow" title="DomainAppender Home Page [new window]" target="_blank">DomainAppender</a>',\r
-'download_express','DownLoad Express',\r
-'dragonbot','DragonBot',\r
-'dwcp','DWCP (Dridus\' Web Cataloging Project)',\r
-'e\-collector','e-collector',\r
-'ebiness','EbiNess',\r
-'elfinbot','ELFINBOT',\r
-'emacs','Emacs-w3 Search Engine',\r
-'emcspider','ananzi',\r
-'esther','Esther',\r
-'evliyacelebi','Evliya Celebi',\r
-'fastcrawler','FastCrawler',\r
-'feedcrawl','FeedCrawl by feed@aobo.com',\r
-'fdse','Fluid Dynamics Search Engine robot',\r
-'felix','Felix IDE',\r
-'fetchrover','FetchRover',\r
-'fido','fido',\r
-'finnish','Finnish',\r
-'fireball','KIT-Fireball',\r
-'fouineur','Fouineur',\r
-'francoroute','Robot Francoroute',\r
-'freecrawl','Freecrawl',\r
-'funnelweb','FunnelWeb',\r
-'gama','gammaSpider, FocusedCrawler',\r
-'gazz','gazz',\r
-'gcreep','GCreep',\r
-'getbot','GetBot',\r
-'geturl','GetURL',\r
-'golem','Golem',\r
-'gougou','GouGou',\r
-'grapnel','Grapnel/0.01 Experiment',\r
-'griffon','Griffon',\r
-'gromit','Gromit',\r
-'gulperbot','Gulper Bot',\r
-'hambot','HamBot',\r
-'havindex','havIndex',\r
-'hometown','Hometown Spider Pro',\r
-'htmlgobble','HTMLgobble',\r
-'hyperdecontextualizer','Hyper-Decontextualizer',\r
-'iajabot','iajaBot',\r
-'iaskspider','<a href="http://www.iask.com/" target="_blank">Sina Iask Spider</a>',\r
-'hl_ftien_spider','<a href="http://www.hylanda.com/" target="_blank">Hylanda</a>',\r
-'sogou','<a href="http://www.sogou.com/" target="_blank">Sogou Spider</a>',\r
-'icjobs\.de', '<a href="http://www.icjobs.de/" title="April 10, 2014: UA contains indentification during the first and second page access only." target="_blank">iCjobs Spider</a> Note: Most traffic counts as user traffic',\r
-#20130805 The user agent string of the icjobs-spider contained the\r
-#identifying string only when it accessed the robots.txt file.\r
-#When it accessed the actual content it did not identify itself as\r
-#a spider. Thus traffic of this spider was counted as user traffic.\r
-#The behavious seems to have changed now - the spider identifies itself\r
-#when it accesses content pages.\r
-#20141401 Behavior as before: Does identify itself when it accesses\r
-# robots.txt and the root page. The following traffic does not contain\r
-# the identification string and is therefore counted as user traffic.\r
-'iconoclast','Popular Iconoclast',\r
-'ilse','Ingrid',\r
-'imagelock','Imagelock',\r
-'incywincy','IncyWincy',\r
-'informant','Informant',\r
-'infoseek','InfoSeek Robot 1.0',\r
-'infoseeksidewinder','Infoseek Sidewinder',\r
-'infospider','InfoSpiders',\r
-'inspectorwww','Inspector Web',\r
-'intelliagent','IntelliAgent',\r
-'ips\-agent', 'ips-agent Verisign(?) - no reliable information found.',\r
-'irobot','I, Robot',\r
-'iron33','Iron33',\r
-'israelisearch','Israeli-search',\r
-'javabee','JavaBee',\r
-'jbot','JBot Java Web Robot',\r
-'jcrawler','JCrawler',\r
-'jobo','JoBo Java Web Robot',\r
-'jobot','Jobot',\r
-'joebot','JoeBot',\r
-'jubii','The Jubii Indexing Robot',\r
-'jumpstation','JumpStation',\r
-'kapsi','image.kapsi.net',\r
-'katipo','Katipo',\r
-'kilroy','Kilroy',\r
-'ko[_+\s]yappo[_+\s]robot','KO_Yappo_Robot',\r
-'kummhttp','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b" title="Bot documentation page [new window]" target="_blank">KummHttp</a>',\r
-'labelgrabber\.txt','LabelGrabber',\r
-'larbin','<a href="http://para.inria.fr/~ailleret/larbin/index-eng.html" title="Bot home page [new window]" target="_blank">larbin</a>',\r
-'legs','legs',\r
-'linkidator','Link Validator',\r
-'linkscan','LinkScan',\r
-'lockon','Lockon',\r
-'logo_gif','logo.gif Crawler',\r
-'macworm','Mac WWWWorm',\r
-'lmspider','<a href="http://www.nuance.com/" title="Bot home page lmspider@scansoft.com [new window]" target="_blank">lmspider</a>',\r
-'lwp\-request','<a href="http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request" title="lwp-request home page [new window]" target="_blank">lwp-request</a>',\r
-'lwp\-trivial','<a href="http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm" title="lwp-trivial home page [new window]" target="_blank">lwp-trivial</a>',\r
-'magpie','<a href="http://magpierss.sf.net/" title="Bot home page [new window]" target="_blank">MagpieRSS</a>',\r
-'marvin','marvin/infoseek',\r
-'mattie','Mattie',\r
-'mediafox','MediaFox',\r
-'merzscope','MerzScope',\r
-'meshexplorer','NEC-MeshExplorer',\r
-'mindcrawler','MindCrawler',\r
-'mnogosearch','mnoGoSearch search engine software',\r
-'momspider','MOMspider',\r
-'monster','Monster',\r
-'motor','Motor',\r
-'muncher','Muncher',\r
-'mwdsearch','Mwd.Search',\r
-'ndspider','NDSpider',\r
-'nederland\.zoek','Nederland.zoek',\r
-'netcarta','NetCarta WebMap Engine',\r
-'netmechanic','<a href="http://www.netmechanic.com/" title="Bot home page [new window]" target="_blank">NetMechanic</a>',\r
-'netscoop','NetScoop',\r
-'newscan\-online','newscan-online',\r
-'nhse','NHSE Web Forager',\r
-'northstar','The NorthStar Robot',\r
-'nzexplorer','nzexplorer',\r
-'objectssearch','ObjectsSearch',\r
-'occam','Occam',\r
-'octopus','HKU WWW Octopus',\r
-'openfind','Openfind data gatherer',\r
-'orb_search','Orb Search',\r
-'packrat','Pack Rat',\r
-'pageboy','PageBoy',\r
-'parasite','ParaSite',\r
-'patric','Patric',\r
-'pegasus','pegasus',\r
-'perignator','The Peregrinator',\r
-'perlcrawler','PerlCrawler 1.0',\r
-'phantom','Phantom',\r
-'phpdig','PhpDig',\r
-'piltdownman','PiltdownMan',\r
-'pimptrain','Pimptrain.com\'s robot',\r
-'pioneer','Pioneer',\r
-'pitkow','html_analyzer',\r
-'pjspider','Portal Juice Spider',\r
-'plumtreewebaccessor','PlumtreeWebAccessor',\r
-'poppi','Poppi',\r
-'portalb','PortalB Spider',\r
-'psbot','<a href="http://www.picsearch.com/bot.html" title="Bot home page" target="_blank">psbot</a>',\r
-'python','<a href="http://docs.python.org/library/urllib.html" title="Tools developed using a Python library" target="_blank">Python-urllib</a>',\r
-'raven','Raven Search',\r
-'rbse','RBSE Spider',\r
-'resumerobot','Resume Robot',\r
-'rhcs','RoadHouse Crawling System',\r
-'road_runner','Road Runner: The ImageScape Robot',\r
-'robbie','Robbie the Robot',\r
-'robi','ComputingSite Robi/1.0',\r
-'robocrawl','RoboCrawl Spider',\r
-'robofox','RoboFox',\r
-'robozilla','Robozilla',\r
-'roverbot','Roverbot',\r
-'rules','RuLeS',\r
-'safetynetrobot','SafetyNet Robot',\r
-'semalt', '<a href="http://semalt.semalt.com/" rel="nofollow" title="seamalt.com Home Page [new window]" target="_blank">seamalt.com</a>',\r
-'search\-info','Sleek',\r
-'search_au','Search.Aus-AU.COM',\r
-'searchprocess','SearchProcess',\r
-'senrigan','Senrigan',\r
-'sgscout','SG-Scout',\r
-'shaggy','ShagSeeker',\r
-'shaihulud','Shai\'Hulud',\r
-'sift','Sift',\r
-'simbot','Simmany Robot Ver1.0',\r
-'sistrix', '<a href="http://crawler.sistrix.net/" rel="nofollow" title="SISTRIX Crawler Page [new window]" target="_blank">SISTRIX Crawler</a>',\r
-'site\-valet','Site Valet',\r
-'sitetech','SiteTech-Rover',\r
-'skymob','Skymob.com',\r
-'slcrawler','SLCrawler',\r
-'smartspider','Smart Spider',\r
-'snooper','Snooper',\r
-'solbot','Solbot',\r
-'speedy','<a href="http://www.entireweb.com/about/search_tech/speedyspider/" title="Speedy Spider home page [new window]" target="_blank">Speedy Spider</a>',\r
-'spider[_+\s]monkey','Spider monkey',\r
-'spiderbot','SpiderBot',\r
-'spiderline','Spiderline Crawler',\r
-'spiderlytics', 'Spiderlytics: No homepage, e-mail only: spider (at) spiderlytics.com',\r
-'spiderman','<a href="http://www.iscrawling.com" title="Spiderman home page [new window]" target="_blank">Spiderman</a>',\r
-'spiderview','SpiderView(tm)',\r
-'spry','Spry Wizard Robot',\r
-'ssearcher','Site Searcher',\r
-'sqworm','<a href="http://www.websense.com/" title="Bot home page (source: http://www.pgts.com.au/) [new window]" target="_blank">Sqworm</a>',\r
-'suke','Suke',\r
-'sunrise','<a href="http://www.sunrisexp.com/" title="Sunrise home page [new window]" target="_blank">Sunrise</a>',\r
-'suntek','suntek search engine',\r
-'sven','Sven',\r
-'tach_bw','TACH Black Widow',\r
-'tagyu_agent','<a href="http://www.tagyu.com/" title="Bot home page [new window]" target="_blank">Tagyu Agent</a>',\r
-'tarantula','Tarantula',\r
-'tarspider','tarspider',\r
-'tailrank','<a href="http://tailrank.com/robot">TailRank</a>',\r
-'techbot','TechBOT',\r
-'templeton','Templeton',\r
-'titan','TITAN',\r
-'titin','TitIn',\r
-'tkwww','The TkWWW Robot',\r
-'tlspider','TLSpider',\r
-'ucsd','UCSD Crawl',\r
-'udmsearch','UdmSearch',\r
-'universalfeedparser','<a href="http://feedparser.org/" title="Bot home page [new window]" target="_blank">UniversalFeedParser</a>',\r
-'urlck','URL Check',\r
-'valkyrie','Valkyrie',\r
-'verticrawl','Verticrawl',\r
-'victoria','Victoria',\r
-'visionsearch','vision-search',\r
-'voidbot','void-bot',\r
-'vwbot','VWbot',\r
-'w3index','The NWI Robot',\r
-'w3m2','W3M2',\r
-'wallpaper','WallPaper (alias crawlpaper)',\r
-'wanderer','the World Wide Web Wanderer',\r
-'wapspider','w@pSpider by wap4.com',\r
-'webbandit','WebBandit Web Spider',\r
-'webcatcher','WebCatcher',\r
-'webcopy','WebCopy',\r
-'webfetcher','webfetcher',\r
-'webfoot','The Webfoot Robot',\r
-'webinator','Webinator',\r
-'weblinker','WebLinker',\r
-'webmirror','WebMirror',\r
-'webmoose','The Web Moose',\r
-'webquest','WebQuest',\r
-'webreader','Digimarc MarcSpider',\r
-'webreaper','WebReaper',\r
-'websnarf','Websnarf',\r
-'webspider','WebSpider',\r
-'webvac','WebVac',\r
-'webwalk','webwalk',\r
-'webwalker','WebWalker',\r
-'webwatch','WebWatch',\r
-'whatuseek','whatUseek Winona',\r
-'whowhere','WhoWhere Robot',\r
-'wired\-digital','Wired Digital',\r
-'wmir','w3mir',\r
-'wolp','WebStolperer',\r
-'wombat','The Web Wombat',\r
-'wordpress','<a href="http://wordpress.org/" title="WordPress home page [new window]" target="_blank">WordPress</a>',\r
-'worm','The World Wide Web Worm',\r
-'woozweb','Woozweb Monitoring',\r
-'wwwc','WWWC Ver 0.2.5',\r
-'wz101','WebZinger',\r
-'xenu\slink\ssleuth', '<a href="http://home.snafu.de/tilman/xenulink.html" rel="nofollow" title="Description, Download, FAQ Page [new window]" target="_blank">Xenu'. "'" . 's Link Sleuth <sup>(TM)</sup></a>, see <a href="http://en.wikipedia.org/wiki/Xenu%27s_Link_Sleuth" rel="nofollow" title="Wikipedia on Xenu'. "'" . 's Link Sleuth [new window]" target="_blank">Wikipedia</a>',\r
-'xget','XGET',\r
-# Other robots reported by users\r
-'^finbot', '<span title="As on Sep. 10, 2015, the user agent string did not contain a web address.">finbot</span>',\r
-'^webindex$', '<span title="As on Oct. 28, 2015, the user agent string did not contain a web address.">WebIndex</span>',\r
-'1\-more_scanner','<a href="http://www.myzips.com/software/1-More-Scanner.phtml" title="1-More Scanner home page [new window]" target="_blank">1-More Scanner</a>',\r
-'360spider','<a href="https://www.google.com/search?q=360spider+-Ferrari" title="No home page, using Google search instead [new window]" target="_blank">360spider</a>',\r
-'a6-indexer',  '<a href="http://www.a6corp.com/a6-web-scraping-policy/" rel="nofollow" title="A6-Indexer [new window]" target="_blank">A6-Indexer</a>',\r
-'accoona\-ai\-agent','<a href="http://www.accoona.com/" title="Accoona-AI-Agent home page [new window]" target="_blank">Accoona-AI-Agent</a>',\r
-'activebookmark','<a href="http://www.libmaster.com/active_bookmark.php" title="ActiveBookmark home page [new window]" target="_blank">ActiveBookmark</a>',\r
-'adamm_bot','<a href="http://home.blic.net/adamm/" title="Bot home page [new window]" target="_blank">AdamM Bot</a>',\r
-'adsbot-google', '<a href="http://www.google.com/adsbot.html" rel="nofollow" title="AdsBot-Google home page [new window]" target="_blank">AdsBot-Google</a>',\r
-'advbot', '<a href="http://advbot.net/bot.html" rel="nofollow" title="AdvBot Home Page [new window]" target="_blank">AdvBot</a>',\r
-'affectv\.co\.uk', '<a href="http://www.affectv.co.uk" rel="nofollow" title="affectv.co.uk Home Page [new window]" target="_blank">affectv.co.uk</a>',\r
-'almaden','<a href="http://www.almaden.ibm.com/cs/crawler" title="IBM Almaden Research Center WebFountain&trade; Bot home page [new window]" target="_blank">IBM Almaden</a> Research Center WebFountain&trade;',\r
-'aipbot','<a href="http://www.aipbot.com/" title="aipbot@aipbot.com Bot home page [new window]" target="_blank">aipbot</a>',\r
-'aleadsoftbot','<a href="http://www.aleadsoft.com/bot.htm" title="ALeadSoftbot home page [new window]" target="_blank">ALeadSoftbot</a>',\r
-'alpha_search_agent','Alpha Search Agent',\r
-'allrati','Allrati',\r
-'aport', 'Aport',\r
-'applebot', '<a href="http://www.apple.com/go/applebot" rel="nofollow" title="Applebot Home Page [new window]" target="_blank">Applebot</a>',\r
-'archive\-de\.com',  '<a href="http://archive-de.com/bot" rel="nofollow" title="Archive-de.com Home Page [new window]" target="_blank">Archive-de.com</a>',\r
-'archive\.org_bot','<a href="http://crawls.archive.org/collections/bncf/crawl.html" title="Bot home page [new window]" target="_blank">archive.org bot</a>',\r
-'argus','<a href="http://www.simpy.com/bot.html" title="feedback@simpy.com Bot home page [new window]" target="_blank">Argus</a>',\r
-'arianna\.libero\.it','<a href="http://arianna.libero.it/" title="Bot home page [new window]" target="_blank">arianna.libero.it</a>',\r
-'aspseek','<a href="http://www.aspseek.org/" title="Bot home page [new window]" target="_blank">ASPseek</a>',\r
-'asterias', 'Asterias',\r
-'awbot', 'AWBot',\r
-'backlinktest\.com', '<a href="http://www.backlinktest.com/crawler.html" title="BacklinkCrawler [new window]" target="_blank">BacklinkCrawler</a>',\r
-'baiduspider','<a href="http://www.baidu.com/search/spider.html" title="Bot home page [new window]" target="_blank">BaiDuSpider</a>',\r
-'becomebot', '<a href="http://www.become.com/site_owners.html" title="Bot home page [new window]" target="_blank">BecomeBot</a>',\r
-'bender','<a href="http://bender.ucr.edu/" title="Bot home page [new window]" target="_blank">bender</a> <a href="http://ivia.ucr.edu/manuals/NiFC/current/index.shtml" title="Bot home page [new window]" target="_blank">focused_crawler</a>',\r
-'betabot','BetaBot',\r
-'biglotron','<a href="http://www.biglotron.com/robot.html" title="Bot home page [new window]" target="_blank">Biglotron</a>',\r
-'bittorrent_bot','<a href="http://www.bittorrent.com/" title="Bot home page [new window]" target="_blank">BitTorrent Bot</a>',\r
-'biz360[_+\s]spider','<a href="http://www.biz360.com/" title="blogsmanager@biz360.com Bot home page [new window]" target="_blank">Biz360 spider</a>',\r
-'blexbot', '<a href="http://webmeup-crawler.com" rel="nofollow" title="BLEXBot Home Page [new window]" target="_blank">BLEXBot</a>, seems to belong to the <a href="http://webmeup.com/" rel="nofollow" title="WebMeUp Home Page [new window]" target="_blank">WebMeUp backlink tool</a>',\r
-'blogbridge[_+\s]service','<a href="http://www.blogbridge.com/" title="Bot home page [new window]" target="_blank">BlogBridge Service</a>',\r
-'bloglines','<a href="http://www.bloglines.com/" title="Bot home page [new window]" target="_blank">Bloglines</a>',\r
-'blogpulse','<a href="http://www.intelliseek.com/" title="Bot home page [new window]" target="_blank">BlogPulse ISSpider intelliseek.com</a>',\r
-'blogsearch','<a href="http://www.icerocket.com/" title="Bot home page [new window]" target="_blank">BlogSearch</a>',\r
-'blogshares','<a href="http://blogshares.com/help.php?node=7" title="Bot home page [new window]" target="_blank">Blogshares Spiders</a>',\r
-'blogslive','<a href="http://www.blogslive.com/" title="info@blogslive.com Bot home page [new window]" target="_blank">Blogslive</a>',\r
-'blogssay','<a href="http://www.blogssay.com/" title="Bot home page [new window]" target="_blank">BlogsSay :: RSS Search Crawler</a>',\r
-'bncf\.firenze\.sbn\.it\/raccolta\.txt','<a href="http://www.bncf.firenze.sbn.it/raccolta.txt" title="Bot home page [new window]" target="_blank">Biblioteca Nazionale Centrale di Firenze</a>',\r
-'bobby', 'Bobby',\r
-'boitho\.com\-dc','<a href="http://www.boitho.com/dcbot.html" title="Bot home page [new window]" target="_blank">boitho.com-dc</a>',\r
-'bookmark\-manager','<a href="http://bkm.sourceforge.net/" title="Bookmark-Manager home page [new window]" target="_blank">Bookmark-Manager</a>',\r
-'boris', 'Boris',\r
-'bubing', '<a href="http://law.di.unimi.it/BUbiNG.html" title="BUbiNG [new window]" target="_blank">BUbiNG</a>',\r
-'bumblebee', 'Bumblebee (relevare.com)',\r
-'candlelight[_+\s]favorites[_+\s]inspector','<a href="http://www.candlelight.com/home.html" title="Candlelight_Favorites_Inspector  home page [new window]" target="_blank">Candlelight_Favorites_Inspector</a>',\r
-'careerbot',  '<a href="http://www.career-x.de/bot.html" rel="nofollow" title="CareerBot home page [new window]" target="_blank">CareerBot</a>',\r
-'cbn00glebot','cbn00glebot',\r
-'ccbot', '<a href="http://commoncrawl.org/faq/" rel="nofollow" title="Common Crawl FAQ Page [new window]" target="_blank">Common Crawl</a>',\r
-'cerberian_drtrs','<a href="http://www.pgts.com.au/cgi-bin/psql?robot_info=25240" title="Bot home page [new window]" target="_blank">Cerberian Drtrs</a>',\r
-'cfnetwork','<a href="http://www.cocoadev.com/index.pl?CFNetwork" title="CFNetwork home page [new window]" target="_blank">CFNetwork</a>',\r
-'cipinetbot','<a href="http://www.cipinet.com/bot.html" title="CipinetBot home page [new window]" target="_blank">CipinetBot</a>',\r
-'checkweb_link_validator','<a href="http://p.duby.free.fr/chkweb.htm" title="CheckWeb link validator home page [new window]" target="_blank">CheckWeb link validator</a>',\r
-'cliqzbot', '<a href="http://cliqz.com/company/cliqzbot" rel="nofollow" title="Cliqzbot Home Page [new window]" target="_blank">Cliqzbot</a>',\r
-'commons\-httpclient','<a href="http://jakarta.apache.org/commons/httpclient/" title="Bot home page [new window]" target="_blank">Jakarta commons-httpclient</a>',\r
-'computer_and_automation_research_institute_crawler','<a href="http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html" title="Computer and Automation Research Institute Crawler home page [new window]" target="_blank">Computer and Automation Research Institute Crawler</a>',\r
-'converamultimediacrawler','<a href="http://www.authoritativeweb.com/crawl/" title="ConveraMultiMediaCrawler home page [new window]" target="_blank">ConveraMultiMediaCrawler</a>',\r
-'converacrawler','<a href="http://www.authoritativeweb.com/crawl/" title="ConveraCrawler home page [new window]" target="_blank">ConveraCrawler</a>',\r
-'copubbot', '<a href="http://www.copub.com/bot.php" rel="nofollow" title="CoPubbot Home Page [new window] Note: Access to bot home page gave a 404 error on Dec 21, 2013" target="_blank">CoPubbot</a>',\r
-'cscrawler','CsCrawler',\r
-'cse_html_validator_lite_online','<a href="http://online.htmlvalidator.com/php/onlinevallite.php" title="CSE HTML Validator Lite Online home page [new window]" target="_blank">CSE HTML Validator Lite Online</a>','cuasarbot','<a href="http://www.cuasar.com/" title="Cuasarbot home page [new window]" target="_blank">Cuasarbot</a>',\r
-'cursor','<a href="http://adcenter.hu/docs/en/bot.html " title="Cursor home page [new window]" target="_blank">Cursor</a>',\r
-'custo','<a href="http://www.netwu.com/custo/" title="Custo home page [new window]" target="_blank">Custo</a>',\r
-'datafountains\/dmoz_downloader','<a href="http://infomine.ucr.edu/ " title="DataFountains/DMOZ Downloader home page [new window]" target="_blank">DataFountains/DMOZ Downloader</a>',\r
-'dataprovider\.com', '<a href="http://www.dataprovider.com/" title="Dataprovider Site Explorer [new window]" target="_blank">Dataprovider Site Explorer</a>',\r
-'daumoa', '<a href="http://tab.search.daum.net/aboutWebSearch.html" title="Daum [new window]" target="_blank">Daum</a>',\r
-'daviesbot', 'DaviesBot',\r
-'daypopbot', 'DayPop',\r
-'deepindex','<a href="http://www.deepindex.net/faq.php" title="Deepindex home page [new window]" target="_blank">Deepindex</a>',\r
-'deusu', '<a href="https://deusu.de/robot.html" rel="nofollow" title="DeuSu [new window]" target="_blank">DeuSu</a>',\r
-'dipsie\.bot','<a href="http://www.dipsie.com/bot/" title="Bot home page [new window]" target="_blank">Dipsie</a>',\r
-'dnsgroup','<a href="http://www.dnsgroup.com/" title="DNSGroup home page [new window]" target="_blank">DNSGroup</a>',\r
-'doccheckbot', 'doccheckbot/1.0, known to <a href="http://www.projecthoneypot.org/ip_46.229.160.208" rel="nofollow" title="Info to IP 46.229.160.208 [new window]" target="_blank">Project Honey Pot</a>',\r
-'domainchecker','<a href="http://net-promoter.com/" title="DomainChecker home page (not confirmed) [new window]" target="_blank">DomainChecker</a>',\r
-'domainsdb\.net','<a href="http://domainsdb.net/" title="Bot home page [new window]" target="_blank">DomainsDB.net</a>',\r
-'dotbot',  '<a href="http://www.opensiteexplorer.org/dotbot" rel="nofollow" title="Home Page [new window]" target="_blank">DotBot, Open Site Explorer</a>',\r
-'duckduckgo-favicons-bot', '<a href="http://duckduckgo.com" rel="nofollow" title="DuckDuckGo Home Page [new window]" target="_blank">DuckDuckGo-Favicons-Bot</a>',\r
-'dulance','<a href="http://www.dulance.com/bot.jsp" title="Bot home page [new window]" target="_blank">Dulance</a>',\r
-'dumbot','<a href="http://www.dumbfind.com/" title="Dumbot home page [new window]" target="_blank">Dumbot</a>',\r
-'dumm\.de\-bot','<a href="http://www.dumm.de/" title="dumm.de-Bot home page [new window]" target="_blank">dumm.de-Bot</a>',\r
-'earthcom\.info','<a href="http://www.earthcom.info/" title="Bot home page [new window]" target="_blank">EARTHCOM.info</a>',\r
-'easydl','<a href="http://keywen.com/Encyclopedia/Bot/" title="EasyDL  home page [new window]" target="_blank">EasyDL</a>',\r
-'eccp', '<a href="http://www.eniro.com/" rel="nofollow" title="Eniro Sverige home page [new window]" target="_blank">Eniro Sverige, email: search (at) eniro.com</a>',\r
-'edgeio\-retriever','<a href="http://www.edgeio.com/" title="Bot home page [new window]" target="_blank">edgeio-retriever</a>',\r
-'ernst[:blank:]2\.0', 'Ernst 2.0 (does not provide any further information)',\r
-'ets_v','<a href="http://www.freetranslation.com/help/" title="ETS home page [new window]" target="_blank">ETS</a> Enterprise Translation Server',\r
-'exactseek','ExactSeek Crawler',\r
-'extreme[_+\s]picture[_+\s]finder','<a href="http://www.exisoftware.com/" title="Extreme_Picture_Finder home page [new window]" target="_blank">Extreme_Picture_Finder</a>',\r
-'eventax','<a href="http://www.eventax.de/" title="eventax home page [new window]" target="_blank">eventax</a>',\r
-'everbeecrawler','EverbeeCrawler',\r
-'everest\-vulcan','<a href="http://everest.vulcan.com/crawlerhelp" title="Bot home page [new window]" target="_blank">Everest-Vulcan</a>',\r
-'ezresult', 'Ezresult',\r
-'enteprise','<a href="http://www.fastsearch.com/" title="Bot home page [new window]" target="_blank">Fast Enteprise Crawler</a>',\r
-'facebook','FaceBook bot',\r
-'facebot', '<a href="https://developers.facebook.com/docs/opengraph/howtos/maximizing-distribution-media-content" rel="nofollow" title=" Home Page [new window]" target="_blank">Facebot (Facebook bot?)</a>',\r
-'fast\-search\-engine','<a href="http://www.fast-search-engine.com/" title="Bot home page [new window]" target="_blank">Fast-Search-Engine</a> (not fastsearch.com)',\r
-'fast_enterprise_crawler','<a href="http://www.fast.no/" title="FAST Enterprise Crawler home page [new window]" target="_blank">FAST Enterprise Crawler</a>',\r
-'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de','<a href="http://www.telekom.de/" title="FAST Enterprise Crawler * crawleradmin.t-info@telekom.de home page [new window]" target="_blank">FAST Enterprise Crawler * crawleradmin.t-info@telekom.de</a>',\r
-'finderlein[_+\s]research[_+\s]crawler', 'Finderlein Research Crawler 1.0 (no contact information given)',\r
-'matrix_s\.p\.a\._\-_fast_enterprise_crawler','<a href="http://tin.virgilio.it/" title="Matrix S.p.A. - FAST Enterprise Crawler home page [new window]" target="_blank">Matrix S.p.A. - FAST Enterprise Crawler</a>',\r
-'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de','<a href="http://www.telekom.de/" title="FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de home page [new window]" target="_blank">FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de</a>',\r
-'fastbot', '<a href="http://www.fastbot.de" rel="nofollow" title="fastbot Home Page [new window]" target="_blank">fastbot</a>',\r
-'favicon','FavIconizer',\r
-'favorg','<a href="http://www.pcmag.com/article2/0,4149,108438,00.asp" title="FavOrg home page [new window]" target="_blank">FavOrg</a>',\r
-'favorites_sweeper','<a href="http://www.manitools.com/favsweep/" title="Favorites_Sweeper home page [new window]" target="_blank">Favorites Sweeper</a>',\r
-'feedburner', 'Feedburner',\r
-'feedfetcher\-google','<a href="http://www.google.com/feedfetcher.html" title="Bot home page [new window]" target="_blank">Feedfetcher-Google</a>',\r
-'feedflow','<a href="http://feedflow.com/about" title="Bot home page [new window]" target="_blank">FeedFlow</a>',\r
-'feedster','<a href="http://www.feedster.com/" title="Bot home page [new window]" target="_blank">Feedster</a>',\r
-'feedsky','<a href="http://www.feedsky.com/" title="Bot home page [new window]" target="_blank">FeedSky</a>',\r
-'feedvalidator','<a href="http://feedvalidator.org/" title="FeedValidator home page [new window]" target="_blank">FeedValidator</a>',\r
-'fetchbot', '<a href="https://github.com/PuerkitoBio/fetchbot" rel="nofollow" title="Fetchbot Home Page [new window]" target="_blank">Fetchbot</a>',\r
-'filmkamerabot','<a href="http://www.filmkamera.at/bot.html" title="FilmkameraBot home page [new window]" target="_blank">FilmkameraBot</a>',\r
-'filterdb\.iss\.net',  '<a href="http://filterdb.iss.net/crawler/" title="oBot Home Page [new window]" target="_blank">oBot</a>',\r
-'findexa_crawler','<a href="http://www.findexa.no/gulesider/article26548.ece " title="Findexa Crawler home page [new window]" target="_blank">Findexa Crawler</a>',\r
-'firmilybot', '<a href="http://www.firmily.com/bot.php" title="Firmily Bot [new window]" target="_blank">Firmily Bot Home page (Website was hacked on Oct. 19, 2013)</a>',\r
-'findlinks','<a href="http://wortschatz.uni-leipzig.de/findlinks/" title="Bot home page [new window]" target="_blank">Findlinks</a>',\r
-'foaf-search\.net', '<a href="http://www.foaf-search.net/" title="Friend of a friend (FOAF) search engine [new window]" target="_blank">Friend of a friend (FOAF) search engine</a>',\r
-'fooky\.com\/ScorpionBot','<a href="http://www.fooky.com/scorpionbots" title="Fooky.com/ScorpionBot/ScoutOut home page [new window]" target="_blank">Fooky.com/ScorpionBot/ScoutOut</a>',\r
-'g2crawler','<a href="http://crawler.instantnetworks.net/" title="Bot home page (nobody@airmail.net) [new window]" target="_blank">G2Crawler</a>',\r
-'gaisbot','<a href="http://gais.cs.ccu.edu.tw/robot.php" title="Bot home page [new window]" target="_blank">Gaisbot</a>',\r
-'geniebot','<a href="http://www.genieknows.com/" title="Bot home page [new window]" target="_blank">Geniebot</a>',\r
-'genieo', '<a href="http://www.genieo.com/webfilter.html" rel="nofollow" title="Genieo [new window]" target="_blank">Genieo</a>',\r
-'gigablastopensource', '<a href="http://www.gigablast.com/" rel="nofollow" title="Gigablast Home page [new window]"  target="_blank">GigablastOpenSource</a>, an Open Source Search Engine(<a href="https://github.com/gigablast/open-source-search-engine/wiki" rel="nofollow" title="at GitHub [new window]" target="_blank">Wiki</a>)',\r
-'gigabot','<a href="http://www.gigablast.com/spider.html" title="Bot home page [new window]" target="_blank">GigaBot</a>',\r
-'girafabot','<a href="http://www.girafa.com/" title="Bot home page [new window]" target="_blank">Girafabot</a>',\r
-'global_fetch','<a href="http://www.wesonet.com/" title="Global Fetch home page [new window]" target="_blank">Global Fetch</a>',\r
-'gnodspider','GNOD Spider',\r
-'goforit\.com','<a href="http://www.goforit.com/about/" title="GoForIt.com home page [new window]" target="_blank">GoForIt.com</a>',\r
-'goforitbot','<a href="http://www.goforit.com/about/" title="GOFORITBOT home page [new window]" target="_blank">GOFORITBOT</a>',\r
-'gonzo','<a href="http://www.suchen.de/faq.html" title="Bot home page [new windows]" target="_blank">suchen.de</a>',\r
-'gpu_p2p_crawler','<a href="http://gpu.sourceforge.net/search_engine.php" title="Bot home page [new window]" target="_blank">GPU p2p crawler</a>',\r
-'grapeshot', '<a href="http://www.grapeshot.co.uk/crawler.php" title="Grapeshot Crawler [new window]" target="_blank">Grapeshot Crawler</a>',\r
-'grub','Grub.org',\r
-'henrythemiragorobot', '<a href="http://www.miragorobot.com/scripts/mrinfo.asp" title="Bot home page [new window]" target="_blank">Mirago</a>',\r
-'heritrix','<a href="http://crawler.archive.org/" title="(used by a few different companies) Bot home page [new window]" target="_blank">Heritrix</a>',\r
-'holmes', 'Holmes',\r
-'hoowwwer','<a href="http://cosco.hiit.fi/search/hoowwwer/" title="HooWWWer home page [new window]" target="_blank">HooWWWer</a>',\r
-'hpprint','HPPrint',\r
-'htmlparser','<a href="http://htmlparser.sourceforge.net/" title="HTMLParser home page [new window]" target="_blank">HTMLParser</a>',\r
-'html[_+\s]link[_+\s]validator','<a href="http://www.lithopssoft.com/ " title="Html_Link_Validator home page [new window]" target="_blank">Html_Link_Validator</a>',\r
-'httrack','<a href="http://www.httrack.com/" title="Bot home page [new window]" target="_blank">HTTrack off-line browser</a>',\r
-'hundesuche\.com\-bot','<a href="http://www.hundesuche.com/" title="Hundesuche.com-Bot home page [new window]" target="_blank">Hundesuche.com-Bot</a>',\r
-'i-bot','i-bot',\r
-'icarus6j', 'Icarus6j, email address in UA string, no website',\r
-'ichiro','<a href="http://help.goo.ne.jp/door/crawlerE.html" title="Bot home page [new window]" target="_blank">ichiro</a>',\r
-'idmarch', '<a href="http://www.idmarch.org/bot.html" rel="nofollow" title=" Home Page [new window]" target="_blank">IDMARCH</a>',\r
-'iltrovatore\-setaccio','<a href="http://www.iltrovatore.it/aiuto/motore_di_ricerca.html" title="bot@iltrovatore.it IlTrovatore-Setaccio home page [new window]" target="_blank">IlTrovatore-Setaccio</a>',\r
-'implisensebot', '<span title="As on Sep. 18, 2015, the user agent string did not contain a web address.">ImplisenseBot</span>',\r
-'infobot','<a href="http://www.infobot.org/" title="InfoBot home page [new window]" target="_blank">InfoBot</a>',\r
-'infociousbot','<a href="http://corp.infocious.com/tech_crawler.php" title="InfociousBot home page [new window]" target="_blank">InfociousBot</a>',\r
-'infohelfer','<a href="http://www.infohelfer.de/crawler.php" title="Infohelfer home page [new window]" target="_blank">Infohelfer</a>',\r
-'infomine','<a href="http://infomine.ucr.edu/useragents" title="Bot home page [new window]" target="_blank">INFOMINE VLCrawler</a>',\r
-'insurancobot','<a href="http://www.fastspywareremoval.com/" title="InsurancoBot home page [new window]" target="_blank">InsurancoBot</a>',\r
-'integromedb\.org','<a href="http://www.integromedb.org/Crawler" title="IntegromeDB home page [new window]" target="_blank">IntegromeDB</a>',\r
-'internet[_+\s]ninja','<a href="http://www.dti.ne.jp/  " title="Internet_Ninja home page [new window]" target="_blank">Internet_Ninja </a>',\r
-'internetarchive','<a href="http://lucene.apache.org/nutch/bot.html " title="InternetArchive home page [new window]" target="_blank">InternetArchive</a>',\r
-'internetseer', 'InternetSeer',\r
-'internetsupervision','<a href="http://internetsupervision.com/" title="InternetSupervision home page [new window]" target="_blank">InternetSupervision</a>',\r
-'irlbot','<a href="http://irl.cs.tamu.edu/crawler" title="Bot home page [new window]" target="_blank">IRLbot</a>',\r
-'isearch2006','<a href="http://www.yahoo.com.cn/" title="isearch2006 home page [new window]" target="_blank">isearch2006</a>',\r
-'istellabot', '<a href="http://www.tiscali.it/" title="IstellaBot [new window]" target="_blank">IstellaBot</a>',\r
-'iupui_research_bot','<a href="http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/" title="IUPUI_Research_Bot home page [new window]" target="_blank">IUPUI_Research_Bot</a>',\r
-'izsearch', '<a href="http://izsearch.com/" rel="nofollow" title="iZSearch Home Page [new window]" target="_blank">iZSearch</a>',\r
-'james\sbot', '<a href="http://cognitiveseo.com/bot.html" rel="nofollow" title="James BOT Home Page [new window]" target="_blank">James BOT</a>',\r
-'jobboerse', '<a href="http://www.xn--jobbrse-d1a.com" rel="nofollow" title="Jobb&ouml;rse Home Page [new window]" target="_blank">Jobb&ouml;rse</a>',\r
-'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility','<a href="http://www.jrtwine.com/Products/CheckFavs/" title="JRTwine_Software_Check_Favorites_Utility  home page [new window]" target="_blank">JRTwine_Software_Check_Favorites_Utility</a>',\r
-'justview', 'JustView',\r
-'kalambot','<a href="http://64.124.122.251/feedback.html" title="KalamBot home page [new window]" target="_blank">KalamBot</a>',\r
-'kamano\.de_newsfeedverzeichnis','<a href="http://www.kamano.de/" title="kamano.de NewsFeedVerzeichnis home page [new window]" target="_blank">kamano.de NewsFeedVerzeichnis</a>',\r
-'kazoombot','<a href="http://www.kazoom.ca/bot.html" title="kazoombot@kazoom.ca KazoomBot home page [new window]" target="_blank">KazoomBot</a>',\r
-'kevin','<a href="http://dznet.com/kevin/" title="Kevin home page [new window]" target="_blank">Kevin</a>',\r
-'keyoshid','<a href="http://www.yahoo.co.jp/" title="Bot home page [new window]" target="_blank">Yahoo! Japan keyoshid robot study</a>',\r
-'kinjabot', 'Kinjabot',\r
-'kinja\-imagebot', 'Kinja Imagebot',\r
-'knowitall','<a href="http://www.cs.washington.edu/research/knowitall/" title="KnowItAll home page [new window]" target="_blank">KnowItAll</a>',\r
-'knowledge\.com','<a href="http://www.knowledge.com/" title="Knowledge.com home page [new window]" target="_blank">Knowledge.com</a>',\r
-'kouaa_krawler','<a href="http://www.kouaa.com/" title="Kouaa Krawler home page [new window]" target="_blank">Kouaa Krawler</a>',\r
-'krugle','<a href="http://www.krugle.com/crawler/info.html" title="Bot home page [new window]" target="_blank">Krugle</a>',\r
-'ksibot','<a href="http://ego.ms.mff.cuni.cz/" title="Bot home page [new window]" target="_blank">ksibot</a>',\r
-'kurzor','<a href="http://www.easymail.hu/" title="cursor@easymail.hu Kurzor home page [new window]" target="_blank">Kurzor</a>',\r
-'lanshanbot','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=%5Cbid_g_l_140406_1%5Cb" title="Bot Information [new window]" target="_blank">lanshanbot</a>',\r
-'letscrawl\.com','<a href="http://letscrawl.com/" title="Bot home page [new window]" target="_blank">LetsCrawl.com</a>',\r
-'libcrawl','Crawl libcrawl',\r
-'link_valet_online','<a href="http://www.htmlhelp.com/tools/valet/" title="Link Valet Online home page [new window]" target="_blank">Link Valet Online</a>',\r
-'linkbot','LinkBot',\r
-'linkdex\.com', '<a href="http://www.linkdex.com/about/bots/" title="Bot home page [new window]" target="_blank">Linkdex</a>',\r
-'linkchecker','<a href="http://linkchecker.sourceforge.net" title="Bot home page [new window]" target="_blank">LinkChecker</a>',\r
-'linkstats\sbot', '<span title="As on Nov. 7, 2015, the user agent string did not contain a web address.">LinkStats Bot</span>',\r
-'lipperhey', '<a href="http://www.lipperhey.com/" rel="nofollow" title="Lipperhey SEO Service Home Page [new window]" target="_blank">Lipperhey SEO Service</a>',\r
-'livejournal\.com', 'LiveJournal.com',\r
-'loadtimebot', '<a href="http://www.loadtime.net/bot.html" rel="nofollow" title="LoadTimeBot Home Page [new window]" target="_blank">LoadTimeBot</a>',\r
-'lssrocketcrawler', '<span title="Example UA-String &quot;LSSRocketCrawler/1.0 LightspeedSystems&quot;">LSSRocketCrawler (no contact information)</span>',\r
-'ltbot', '<a href="http://www.language-tools.com/" title="Language Tools Home Page [new window]" target="_blank">Language Tools Bot (ltbot)</a>',\r
-'ltx71', '<a href="http://ltx71.com/" rel="nofollow" title="ltx71 Home Page [new window]" target="_blank">ltx71</a>',\r
-'madaali\.de', '<a href="http://www.madaali.de/pfadzurbotseite/bot.html" rel="nofollow" title="Link resulted in a 404 Error on Nov 6, 2014 [new window]" target="_blank">www.madaali.de</a>',\r
-'magpierss', 'MagpieRSS',\r
-'mail\.ru', '<a href="http://go.mail.ru/help/robots" title="Mail.ru bot home page [new window]" target="_blank">Mail.ru bot</a>',\r
-'mapoftheinternet\.com','<a href="http://MapoftheInternet.com/" title="MapoftheInternet.com home page [new window]" target="_blank">MapoftheInternet.com</a>',\r
-'meanpathbot', '<a href="http://www.meanpath.com/meanpathbot.html" rel="nofollow" title="Meanpathbot Home Page [new window]" target="_blank">Meanpathbot</a>',\r
-'mediabot', '<a href="http://isdownload.biz" rel="nofollow" title="MediaBot refers to isdownload.biz [new window]" target="_blank">MediaBot</a>',\r
-'mediapartners\-google','<a href="https://adwords.google.com/" title="Bot home page [new window]" target="_blank">Google AdSense</a>',\r
-# 'Mediapartners-Google (Feb 12, 2015: no additial information in UA String, seems to use <a href="http://www.gigablast.com/" title="Gigablast Home page [new window]">GigablastOpenSource</a>',\r
-# Uses UA string "Mediapartners-Google" only, and there were accesses using an UA string "GigablastOpenSource/1.0" from the same IP-Address.\r
-# Therefore this is probably not related to Google 4.3.2015 Albrecht Müller\r
-'megaindex', '<a href="http://megaindex.com/crawler" rel="nofollow" title="MegaIndex Crawler Page [new window]" target="_blank">MegaIndex Crawler</a>, seems to belong to <a href="https://www.megaindex.ru/" rel="nofollow" title="MegaIndex.ru Home Page [new window]" target="_blank">MegaIndex.ru</a>',\r
-'megite','<a href="http://www.megite.com/" title="Megite home page [new window]" target="_blank">Megite</a>',\r
-'memorybot', '<a href="http://archivethe.net/en/index.php/about/internet_memory1" rel="nofollow" title="Archivethe.net Home Page [new window]" target="_blank">Archivethe.net</a>',\r
-'metager2-verification-bot', '<a href="http://metager2.de/technology.php" rel="nofollow" title="metager2-verification-bot Home Page [new window]" target="_blank">metager2-verification-bot</a>',\r
-'metager\-linkchecker','MetaGer LinkChecker',\r
-'metajobbot', '<a href="http://www.metajob.de/crawler" rel="nofollow" title="MetaJobBot [new window]" target="_blank">MetaJobBot</a>',\r
-'metaspinner','<a href="http://index.meta-spinner.de/" title="Metaspinner home page [new window]" target="_blank">Metaspinner</a>',\r
-'miadev',  '<a href="http://www.mia-marktplatz.de/spider" rel="nofollow" title="MiaDev spider [new window]" target="_blank">MiaDev spider</a>',\r
-'microsoft\sbits', '<a href="http://msdn.microsoft.com/en-us/library/bb968799%28v=vs.85%29.aspx" rel="nofollow" title="Microsoft Background Intelligent Transfer Service (BITS)? [new window]" target="_blank">Microsoft Background Intelligent Transfer Service (BITS)?</a>',\r
-'microsoft.*discovery', '<a href="http://support.microsoft.com/kb/838028/en-us" title="Microsoft KB838028 [new window]" target="_blank">Microsoft Office Protocol Discovery</a>/<a href="http://blogs.msdn.com/b/vsofficedeveloper/archive/2008/03/11/office-existence-discovery-protocol.aspx" title="Description of the Microsoft Office Existence Discovery [new window]" target="_blank">Microsoft Office Existence Discovery</a>',\r
-'microsoft[_+\s]url[_+\s]control','<a href="http://www.webmasterworld.com/forum11/1005.htm" title="Microsoft URL Control  home page [new window]" target="_blank">Microsoft URL Control</a>',\r
-'mindupbot', '<a href="http://datenbutler.de" rel="nofollow" title="DATENBUTLER home page [new window]" target="_blank">mindUpBot (datenbutler.de)</a>',\r
-'minirank','<a href="http://minirank.com/" title="miniRank home page [new window]" target="_blank">miniRank</a>',\r
-'mini\-reptile','Mini-reptile',\r
-'missigua_locator','<a href="http://www.webmasterworld.com/forum11/2690.htm" title="Missigua_Locator  home page [new window]" target="_blank">Missigua_Locator</a>',\r
-'misterbot','<a href="http://www.misterbot.fr/" title="Misterbot home page [new window]" target="_blank">Misterbot</a>',\r
-'miva','<a href="http://www.miva.com/" title="Miva home page [new window]" target="_blank">Miva</a>',\r
-'mizzu_labs','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_m_141105_2\b " title="Mizzu Labs home page [new window]" target="_blank">Mizzu Labs</a>',\r
-'mj12bot','<a href="http://majestic12.co.uk/bot.php" title="Bot home page. [new window]" target="_blank">MJ12bot</a>',\r
-'mojeekbot','<a href="http://www.mojeek.com/bot.html" title="Bot home page. [new window]" target="_blank">MojeekBot</a>',\r
-'msiecrawler','<a href="http://msdn.microsoft.com/workshop/delivery/offline/linkrel.asp" title="Bot home page. [new window]" target="_blank">MSIECrawler</a>',\r
-'ms[_+\s]search[_+\s]6\.0[_+\s]robot','<a href="http://support.microsoft.com/default.aspx?scid=kb;en-us;284022" title="Microsoft Support Page. [new window]" target="_blank">MS Search 6.0 Robot</a> (MS SharePoint Portal Server?)',\r
-'ms_search_4\.0_robot','<a href="http://support.microsoft.com/default.aspx?scid=kb;en-us;284022" title="Bot home page. [new window]" target="_blank">MS SharePoint Portal Server - MS Search 4.0 Robot</a>',\r
-'msrabot','msrabot',\r
-'msrbot','<a href="http://research.microsoft.com/research/sv/msrbot/" title="MSRBOT home page [new window]" target="_blank">MSRBOT</a>',\r
-'mt::telegraph::agent','MT::Telegraph::Agent',\r
-'mydoyouhike','<a href="http://www.doyouhike.net/my" title="Mydoyouhike home page [new window]" target="_blank">Mydoyouhike</a>',\r
-'nagios','Nagios',\r
-'nasa_search','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_140506_2\b" title="NASA Search home page [new window]" target="_blank">NASA Search</a>',\r
-'netestate\sne\scrawler','<a href="http://www.website-datenbank.de/" title="Website-Datenbank home page [new window]" target="_blank">Website-Datenbank</a>',\r
-'netluchs','<a href="http://www.netluchs.de/" title="Bot home page. [new window]" target="_blank">Netluchs</a>',\r
-'netsprint','<a href="http://www.netsprint.pl/serwis/" title="NetSprint home page [new window]" target="_blank">NetSprint</a>',\r
-'newsgatoronline', 'NewsGator Online',\r
-'nicebot','<a href="http://www.egghelp.org/setup.htm" title="Bot home page (there may be others) [new window]" target="_blank">nicebot</a>',\r
-'nimblecrawler','<a href="http://www.healthline.com/" title="NimbleCrawler home page [new window]" target="_blank">NimbleCrawler</a>',\r
-'noxtrumbot','<a href="http://www.noxtrum.com/" title="Bot home page [new window]" target="_blank">noxtrumbot</a>',\r
-'npbot','<a href="http://www.nameprotect.com/botinfo.html" title="NPBot home page [new window]" target="_blank">NPBot</a>',\r
-'loocalcrawler/nutch', '<a href="https://weluse.de/" rel="nofollow" title="https://weluse.de/ [new window]" target="_blank">LoocalCrawler/Nutch</a>',\r
-'nutchcvs','<a href="http://lucene.apache.org/nutch/bot.html" title="NutchCVS home page [new window]" target="_blank">NutchCVS</a>',\r
-'nutchosu\-vlib','<a href="http://lucene.apache.org/nutch/bot.html" title="NutchOSU-VLIB home page [new window]" target="_blank">NutchOSU-VLIB</a>',\r
-'nutch','<a href="http://lucene.apache.org/nutch/" title="Bot home page. Used by many, including Looksmart. [new window]" target="_blank">Nutch</a>',\r
-'ocelli','<a href="http://www.globalspec.com/Ocelli/" title="Ocelli home page [new window]" target="_blank">Ocelli</a>',\r
-'octora_beta_bot','<a href="http://www.octora.com/" title="Bot home page [new window]" target="_blank">Octora Beta Bot</a>',\r
-'omniexplorer[_+\s]bot','<a href="http://www.omni-explorer.com/" title="Bot home page. [new window]" target="_blank">OmniExplorer Bot</a>',\r
-'onet\.pl[_+\s]sa','<a href="http://szukaj.onet.pl/" title="Onet.pl_SA home page [new window]" target="_blank">Onet.pl_SA</a>',\r
-'onfolio','<a href="http://www.onfolio.com/" title="Bot home page [new window]">Onfolio</a>',\r
-'opentaggerbot','<a href="http://www.opentagger.com/opentaggerbot.htm" title="Bot home page [new window]">OpenTaggerBot</a>',\r
-'openwebspider','<a href="http://www.openwebspider.org/" title="OpenWebSpider home page [new window]" target="_blank">OpenWebSpider</a>',\r
-'optimizer', '<span title="As on Oct. 2, 2015, the user agent string did not contain a web address.">Optimizer</span>',\r
-'oracle_ultra_search','<a href="http://www.oracle.com/technology/products/ultrasearch/index.html" title="Oracle Ultra Search home page [new window]" target="_blank">Oracle Ultra Search</a>',\r
-'orangebot', 'OrangeBot, no website, log entry specifies mail address', # support.orangebot@orange.com\r
-'orbiter','<a href="http://www.dailyorbit.com/bot.htm" title="Orbiter home page [new window]" target="_blank">Orbiter</a>',\r
-'yodaobot','<a href="http://www.yodao.com/help/webmaster/spider/" title="YodaoBot">OutfoxBot/YodaoBot</a>',\r
-'qihoobot','<a href="http://www.qihoo.com/" title="QihooBot">QihooBot</a>',\r
-'qwantify', '<a href="https://www.qwant.com/" rel="nofollow" title="Qwant Home Page [new window]" target="_blank">Qwant</a>',\r
-'passwordmaker\.org','<a href="http://passwordmaker.org/" title="passwordmaker.org home page [new window]" target="_blank">passwordmaker.org</a>',\r
-'pear_http_request_class','<a href="http://pear.php.net/" title="PEAR HTTP Request class home page [new window]" target="_blank">PEAR HTTP Request class</a>',\r
-'peerbot','<a href="http://www.peerbot.com/" title="PEERbot home page [new window]" target="_blank">PEERbot</a>',\r
-'perman', 'Perman surfer',\r
-'php[_+\s]version[_+\s]tracker','<a href="http://www.nexen.net/phpversion/bot.php" title="PHP Version Tracker home page [new window]" target="_blank">PHP version tracker</a>',\r
-'phpcrawl', '<a href="http://phpcrawl.cuab.de/" rel="nofollow" title="PHPCrawl Home page [new window]" target="_blank">PHPCrawl</a>',\r
-'picmole', '<a href="http://www.picmole.com/" rel="nofollow" title="Bot home page. [new window]" target="_blank">Specified address www.picmole.com was not reachable on April 21, 2014</a>',\r
-'pictureofinternet','<a href="http://malfunction.org/poi/" title="PictureOfInternet home page [new window]" target="_blank">PictureOfInternet</a>',\r
-'ping\.blo\.gs','<a href="http://blo.gs/ping.php" title="Bot home page. [new window]" target="_blank">ping.blo.gs</a>',\r
-'plinki','<a href="http://www.plinki.com/" title="plinki home page [new window]" target="_blank">plinki</a>',\r
-'pluckfeedcrawler','<a href="http://www.pluck.com/" title="Bot home page. [new window]" target="_blank">PluckFeedCrawler</a>',\r
-'plukkie',  '<a href="http://www.botje.com/plukkie.htm" rel="nofollow" title="Plukkie [new window]" target="_blank">Plukkie</a>',\r
-'pogodak','<a href="http://www.pogodak.com" title="Pogodak home page [new window]" target="_blank">Pogodak.com</a>',\r
-'pompos','<a href="http://dir.com/pompos.html" title="Bot home page. [new window]" target="_blank">Pompos</a>',\r
-'popdexter','Popdexter',\r
-'port_huron_labs','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_1133\b" title="Port Huron Labs home page [new window]" target="_blank">Port Huron Labs</a>',\r
-'postfavorites','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_1135\b " title="PostFavorites home page [new window]" target="_blank">PostFavorites</a>',\r
-'projectwf\-java\-test\-crawler','ProjectWF-java-test-crawler',\r
-'proodlebot','<a href="http://www.proodle.com/" title="proodleBot home page [new window]" target="_blank">proodleBot</a>',\r
-'publiclibraryarchive', '<a href="http://publiclibraryarchive.org" rel="nofollow" title="On 23 June 2014 a page parked at GoDaddy [new window]" target="_blank">publiclibraryarchive.org (related to spiderlytics.com and/or waybackarchive.org?)</a>',\r
-#Observations 2014-06-23\r
-#Domain publiclibraryarchive.org is parked at GoDaddy.com\r
-#from https://www.projecthoneypot.org/\r
-#81.30.151.220's User Agent Strings (honeypot classified this ip as an mail server, active about 6 years ago)\r
-#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)\r
-#176.9.138.27's User Agent Strings\r
-#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)\r
-#Mozilla/5.0 (compatible; Spiderlytics/1.0; +spider@spiderlytics.com)\r
-#Mozilla/5.0 (compatible; waybackarchive.org/1.0; +spider@waybackarchive.org)\r
-#146.0.32.165's User Agent Strings\r
-#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)\r
-#Mozilla/5.0 (compatible; savetheworldheritage.org/1.0; +crawl@savetheworldheritage.org)\r
-#Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net)\r
-'pyquery','<a href="http://sourceforge.net/projects/pyquery/" title="PyQuery home page [new window]" target="_blank">PyQuery</a>',\r
-'rambler','<a href="http://www.rambler.ru/doc/faq.shtml" title="Bot home page [new window]">StackRambler</a>',\r
-'redalert','Red Alert',\r
-'relevantnoise\.com', '<a href="http://www.relevantnoise.com/" title="Relevant Noise [new window]" target="_blank">Relevant Noise</a>',\r
-'riddler', '<a href="http://riddler.io/about" rel="nofollow" title="Riddler [new window]" target="_blank">Riddler</a>',\r
-'rogerbot', '<a href="http://moz.com/help/pro/what-is-rogerbot-" rel="nofollow" title="Rogerbot Home Page [new window]" target="_blank">Rogerbot</a>',\r
-'rojo','<a href="http://rojo.com/" title="Bot home page [new window]" target="_blank">RoJo</a> aggregator',\r
-'rssimagesbot','<a href="http://herbert.groot.jebbink.nl/?app=rssImages" title="Bot home page [new window]" target="_blank">rssImagesBot</a>',\r
-'ruffle','<a href="http://www.unreach.net/" title="Bot home page [new window]" target="_blank">ruffle SemanticWeb crawler</a>',\r
-'rufusbot','<a href="http://64.124.122.252.webaroo.com/feedback.html" title="Bot home page [new window]" target="_blank">RufusBot Rufus Web Miner</a>',\r
-'safeads\.xyz', '<a href="http://www.safeads.xyz/" rel="nofollow" title="SafeAds.xyz [new window]" target="_blank">SafeAds.xyz</a>',\r
-'safesearch',  '<a href="https://safesearch.avira.com" rel="nofollow" title="Avira SafeSearch Home Page [new window]" target="_blank">Avira SafeSearch</a>',\r
-'sandcrawler','<a href="http://www.microsoft.com/" title="Bot home page [new window]" target="_blank">SandCrawler (Microsoft)</a>',\r
-'savetheworldheritage', '<a href="http://savetheworldheritage.org" rel="nofollow" title="On March 4, 2015 a page parked at GoDaddy [new window]" target="_blank">savetheworldheritage.org (related to spiderlytics.com, waybackarchive.org and/or publiclibraryarchive.org?)</a>',\r
-'sbider','<a href="http://www.sitesell.com/sbider.html" title="Bot home page [new window]" target="_blank">SBIder</a>',\r
-'schizozilla','<a href="http://spamhuntress.com/2005/03/18/gizmo/ " title="Schizozilla home page [new window]" target="_blank">Schizozilla</a>',\r
-'scumbot','Scumbot',\r
-'searchguild[_+\s]dmoz[_+\s]experiment','<a href="http://www.searchguild.com/" title="SearchGuild_DMOZ_Experiment  home page [new window]" target="_blank">SearchGuild_DMOZ_Experiment</a>',\r
-'searchmetricsbot','<a href="http://www.searchmetrics.com/en/searchmetrics-bot/" rel="nofollow" title="SearchmetricsBot [new window]" target="_blank">SearchmetricsBot</a>',\r
-'seekbot','<a href="http://www.seekbot.net/bot.html" title="Bot home page [new window]">Seekbot</a>',\r
-'semrushbot', '<a href="http://www.semrush.com/bot.html" rel="nofollow" title="SemrushBot [new window]" target="_blank">SemrushBot</a>',\r
-'sensis_web_crawler','<a href="http://www.sensis.com.au/" title="Sensis Web Crawler home page [new window]" target="_blank">Sensis Web Crawler</a>',\r
-'seodiver', '<a href="http://www.seodiver.com/bot" rel="nofollow" title="SEO DIVER Bot Home Page [new window]" target="_blank">SEO DIVER</a>',\r
-'seokicks\.de', '<a href="http://www.seokicks.de/robot.html" rel="nofollow" title="SEOkicks Webcrawler home page [new window]" target="_blank">SEOkicks Webcrawler</a>',\r
-'seoscanners', '<a href="http://seoscanners.net" rel="nofollow" title="On August 4, 2015 a page parked at GoDaddy [new window]" target="_blank">seoscanners.net</a> (related to publiclibraryarchive.org and savetheworldheritage.org?)',\r
-'seznambot','<a href="http://fulltext.seznam.cz/" title="Bot home page [new window]" target="_blank">SeznamBot</a>',\r
-'shim\-crawler','<a href="http://www.logos.ic.i.u-tokyo.ac.jp/crawler/" title="crawl@logos.ic.i.u-tokyo.ac.jp Bot home page [new window]" target="_blank">Shim-Crawler</a>',\r
-'shoutcast','Shoutcast Directory Service',\r
-'sitedomain-bot', '<a href="http://www.sitedomain.de/sitedomain-bot/" rel="nofollow" title="Sitedomain-Bot Home Page [new window]" target="_blank">Sitedomain.de</a>',\r
-'siteexplorer\.info', '<a href="http://siteexplorer.info/" title="Site Explorer home page [new window]" target="_blank">Site Explorer</a>',\r
-'skimbot', '<a href="http://www.skimlinks.com" rel="nofollow" title="SkimBot [new window]" target="_blank">SkimBot</a>',\r
-'slysearch','SlySearch',\r
-'smtbot', '<a href="http://www.similartech.com/smtbot" rel="nofollow" title="SMTBot Home Page [new window]" target="_blank">SMTBot</a>',\r
-'snap\.com_beta_crawler','<a href="http://www.snap.com/" title="snap.com beta crawler home page [new window]" target="_blank">snap.com beta crawler</a>',\r
-'sohu\-search','<a href="http://corp.sohu.com/" title="Bot home page [new window]" target="_blank">sohu-search</a>',\r
-'sohu','<a href="http://corp.sohu.com/" title="Bot home page [new window]" target="_blank">sohu agent</a>',\r
-'snappy','<a href="http://www.urltrends.com/faq.php" title="Bot home page [new window]" target="_blank">Snappy</a>',\r
-'spbot', '<a href="http://www.seoprofiler.com/bot" rel="nofollow" title="SEOprofiler Bot [new window]" target="_blank">SEOprofiler Bot</a>',\r
-'sphere_scout','<a href="http://www.sphere.com/" title="Bot home page [new window]" target="_blank">Sphere Scout</a>',\r
-'spip','<a href="http://www.spip.net" title="SPIP home page [new window]" target="_blank">SPIP</a>',\r
-'sproose_crawler','<a href="http://www.sproose.com/bot.html" title="Bot home page [new window]" target="_blank">sproose crawler</a>',\r
-'ssearch_bot', '<a href="http://www.semantissimo.de/" title="sSearch Crawler [new window]" target="_blank">sSearch Crawler</a>',\r
-'steroid__download','<a href="http://faqs.org.ru/progr/pascal/delphi_internet2.htm" title="STEROID  Download home page [new window]" target="_blank">STEROID  Download</a>',\r
-'steeler','<a href="http://www.tkl.iis.u-tokyo.ac.jp/~crawler/ " title="Steeler home page [new window]" target="_blank">Steeler</a>',\r
-'stq_bot', '<a href="http://www.searchteq.de" rel="nofollow" title="SEARCHTEQ Home Page [new window]" target="_blank">SEARCHTEQ</a>',\r
-'suchfin\-bot','<a href="http://www.suchfin.de/" title="Suchfin-Bot home page [new window]" target="_blank">Suchfin-Bot</a>',\r
-'superbot','<a href="http://www.sparkleware.com/superbot/" title="SuperBot home page [new window]" target="_blank">SuperBot</a>',\r
-'surveybot','SurveyBot',\r
-'susie','<a href="http://www.sync2it.com/bms/susie.php" title="Susie home page [new window]" target="_blank">Susie</a>',\r
-'syndic8','Syndic8',\r
-'syndicapi','<a href="http://syndicapi.com/bot.html" title="Bot home page [new window]" target="_blank">SyndicAPI</a>',\r
-'synoobot','<a href="http://www.synoo.de/bot.html" title="webmaster@synoo.com SynooBot home page [new window]" target="_blank">SynooBot</a>',\r
-'tcl_http_client_package','<a href="http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm" title="Tcl http client package home page [new window]" target="_blank">Tcl http client package</a>',\r
-'technoratibot', 'Technoratibot',\r
-'teragramcrawlersurf','<a href="http://www.teragram.com/" title="TeragramCrawlerSURF home page [new window]" target="_blank">TeragramCrawlerSURF</a>',\r
-'test_crawler','<a href="http://netp.ath.cx/" title="Test Crawler home page [new window]" target="_blank">Test Crawler</a>',\r
-'testbot','<a href="http://www.agbrain.com/" title="TestBot home page [new window]" target="_blank">TestBot</a>',\r
-'thumbsniper', '<a href="http://thumbsniper.com" rel="nofollow" title="ThumbSniper Home Page [new window]" target="_blank">ThumbSniper</a>',\r
-'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','<a href="http://www.thunderstone.com/" title="Bot home page. Used by many. [new window]" target="_blank">T-H-U-N-D-E-R-S-T-O-N-E</a>',\r
-'topicblogs', '<a href="http://www.topicblogs.com/" title="Bot home page [new window]" target="_blank">topicblogs</a>',\r
-'turnitinbot', '<a href="http://www.turnitin.com/robot/crawlerinfo.html" rel="nofollow" title="TurnitinBot Home Page [new window]" target="_blank">Turn It In</a>',\r
-'turtle', 'Turtle',\r
-'turtlescanner', 'Turtle',\r
-'tutorgigbot','<a href="http://www.tutorgig.info/" title="TutorGigBot home page [new window]" target="_blank">TutorGigBot</a>',\r
-'twiceler','<a href="http://www.cuill.com/twiceler/robot.html" title="Twiceler home page [new window]" target="_blank">twiceler</a>',\r
-'ubicrawler','<a href="http://law.dsi.unimi.it/ubicrawler/" title="Bot home page [new window]" target="_blank">UbiCrawler</a>',\r
-'ultraseek', 'Ultraseek',\r
-'unchaos_bot_hybrid_web_search_engine','<a href="http://www.unchaos.com/" title="UnChaos Bot Hybrid Web Search Engine home page [new window]" target="_blank">UnChaos Bot Hybrid Web Search Engine</a>',\r
-'unido\-bot','<a href="http://www.unchina.org/unido/unido/our_projects/3_3.html" title="unido-bot home page [new window]" target="_blank">unido-bot</a>',\r
-'unisterbot', 'UnisterBot; E-Mail only: crawler (at) unister.de',\r
-'updated','<a href="http://www.updated.com/" title="updated home page [new window]" target="_blank">updated</a>',\r
-'ustc\-semantic\-group','<a href="http://ai.ustc.edu.cn/mas/en/research/index.php" title="Bot home page [new window]" target="_blank">USTC-Semantic-Group</a>',\r
-'vagabondo\-wap','<a href="http://www.wise-guys.nl/Contact/index.php?botselected=webagents&amp;lang=uk" title="Bot home page [new window]" target="_blank">Vagabondo-WAP</a>',\r
-'vagabondo','<a href="http://www.wise-guys.nl/Contact/index.php?botselected=webagents&amp;lang=uk" title="Bot home page [new window]" target="_blank">Vagabondo</a>',\r
-'vebidoobot', '<span title="As on Oct. 27, 2015, the user agent string did not contain a web address.">vebidoobot</span>',\r
-'vermut','<a href="http://vermut.aol.com/" title="Bot home page [new window]" target="_blank">Vermut</a>',\r
-'versus_crawler_from_eda\.baykan@epfl\.ch','<a href="http://www.epfl.ch/Eindex.html  " title="versus crawler from eda.baykan@epfl.ch home page [new window]" target="_blank">versus crawler from eda.baykan@epfl.ch</a>',\r
-'vespa_crawler','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb" title="Bot home page [new window]" target="_blank">Vespa Crawler</a>',\r
-'voltron', '<span title="As on Oct. 21, 2015, the user agent string did not contain a web address.">voltron</span>',\r
-'vortex','<a href="http://marty.anstey.ca/projects/robots/vortex/" title="Bot home page [new window]" target="_blank">VORTEX</a>',\r
-'vse\/','<a href="http://www.vivisimo.com/" title="VSE home page [new window]" target="_blank">VSE</a>',\r
-'w3c\-checklink','<a href="http://validator.w3.org/checklink/" title="Bot home page [new window]" target="_blank">W3C Link Checker</a>',\r
-'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa', '<a href="http://jigsaw.w3.org/css-validator/" title="Bot home page [new window]" target="_blank">W3C jigsaw CSS Validator</a>',\r
-'w3c_validator','<a href="http://validator.w3.org/" title="Bot home page [new window]" target="_blank">W3C Validator</a>',\r
-'watchmouse', '<a href="http://www.watchmouse.com/en/" title="WatcMouse">WatchMouse Website Monitor</a>',\r
-'wavefire','<a href="http://www.wavefire.com" title="info@wavefire.com; Bot home page [new window]" target="_blank">Wavefire</a>',\r
-'waybackarchive\.org', '<span title="Maybe related to spiderlytics.">No website, email: spider(at)waybackarchive.org</span>',\r
-# 2.12.2013 Project Honeypot reports at least one of the IPs used by waybackarchive with a spiderlytics UA string.\r
-# Problably not related to the wayback machine of archive.org.\r
-'wbsearchbot', '<a href="http://www.warebay.com/bot.html" rel="nofollow" title="WBSearchBot [new window]" target="_blank">WBSearchBot</a>',\r
-'webclipping\.com', 'WebClipping.com',\r
-'webcompass', 'webcompass',\r
-'webcrawl\.net','<a href="http://www.webcrawl.net/" title="webcrawl.net home page [new window]" target="_blank">webcrawl.net</a>',\r
-'web_downloader','<a href="http://www.krasu.ru/soft/chuchelo/" title="Web Downloader home page [new window]" target="_blank">Web Downloader</a>',\r
-'webdup','<a href="http://www.webdup.com/en/index.html" title="Webdup home page [new window]" target="_blank">Webdup</a>',\r
-'webfilter','<a href="http://www.verso.com/enterprise/netspective/webfilter.asp" title="Bot home page [new window]" target="_blank">WebFilter</a>',\r
-'webindexer','<a href="mailto://webindexerv1@yahoo.com" title="WebIndexer home page [new window]" target="_blank">WebIndexer</a>',\r
-'webminer','<a href="http://64.124.122.252/feedback.html" title="WebMiner home page [new window]" target="_blank">WebMiner</a>',\r
-'website[_+\s]monitoring[_+\s]bot','<a href="http://InternetSupervision.com/UrlMonitor/3/" title="Website_Monitoring_Bot home page [new window]" target="_blank">Website_Monitoring_Bot</a>',\r
-'webvulncrawl', 'WebVulnCrawl',\r
-'wells_search','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_t_z_1484\b " title="Wells Search home page [new window]" target="_blank">Wells Search</a>',\r
-'wer-liefert-was', '<a href="http://www.wlw.de/extern/crawler/Wer-liefert-was-Crawler.html" rel="nofollow" title="Page given in UA string gave a 404 Error on July 2, 2015 [new window]" target="_blank">Wer-liefert-was Crawler</a> Note: AWStats counts most traffic as user traffic',\r
-'wesee:search', '<a href="http://www.wesee.com/en/support/bot/" title="WeSEE Bot Home Page (gave a 404-Error on Nov. 2, 2013) [new window]" target="_blank">WeSEE Bot</a>',\r
-'wevikabot', '<a href="http://www.wevika.de/" rel="nofollow" title="WeViKa Home Page [new window]" target="_blank">WeViKa</a>',\r
-'wonderer', 'Web Wombat Redback Spider',\r
-'wotbox', '<a href="http://www.wotbox.com/bot/" rel="nofollow" title="Wotbox Bot Home Page [new window]" target="_blank">Wotbox</a>',\r
-'wume_crawler','<a href="http://wume.cse.lehigh.edu/~xiq204/crawler/ " title="wume crawler home page [new window]" target="_blank">wume crawler</a>',\r
-'wwweasel',,'<a href="http://wwweasel.de/" title="Website_Monitoring_Bot home page [new window]" target="_blank">WWWeasel</a>',\r
-'xenu\'s_link_sleuth','<a href="http://home.snafu.de/tilman/xenulink.html" title="Xenu Link Sleuth home page [new window]" target="_blank">Xenu Link Sleuth</a>',\r
-'xenu_link_sleuth','<a href="http://home.snafu.de/tilman/xenulink.html" title="Xenu Link Sleuth home page [new window]" target="_blank">Xenu Link Sleuth</a>',\r
-'xirq','<a href="http://www.xirq.com/" title="xirq home page [new window]" target="_blank">xirq</a>',\r
-'xovibot', '<a href="http://www.xovibot.net/" rel="nofollow" title="XoviBot Home Page [new window]" target="_blank">XoviBot</a>',\r
-'y!j', '<a href="http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html" title="Bot home page [new window]" target="_blank">Y!J Yahoo Japan</a>',\r
-'yacy', '<a href="http://yacy.net/bot.html" rel="nofollow" title="YaCy Home Page [new window]" target="_blank">YaCy</a>',\r
-'yahoo\-blogs','<a href="http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html" title="Bot home page [new window]" target="_blank">Yahoo-Blogs</a>',\r
-'yahoo\-verticalcrawler', 'Yahoo Vertical Crawler',\r
-'yahoofeedseeker', '<a href="http://publisher.yahoo.com/rssguide" title="Bot home page [new window]" target="_blank">Yahoo Feed Seeker</a>',\r
-'yahooseeker\-testing', '<a href="http://search.yahoo.com/" title="Bot home page [new window]" target="_blank">YahooSeeker-Testing</a>',\r
-'yahooseeker', '<a href="http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html" title="Bot home page [new window]" target="_blank">YahooSeeker Yahoo! Blog crawler</a>',\r
-'yahoo\-mmcrawler', '<a href="mailto:mms-mmcrawler-support@yahoo-inc.com?subject=Yahoo-MMCrawler Information" title="E-mail Bot">Yahoo-MMCrawler</a>',\r
-'yahoo!_mindset','<a href="http://mindset.research.yahoo.com/" title="Bot home page [new window]">Yahoo! Mindset</a>',\r
-'yandex', '<a href="http://yandex.com/bots" title="Bot home page [new window]">Yandex Bot</a>',\r
-'flexum', 'Flexum Search Engine',\r
-'yanga', 'Yanga WorldSearch Bot',\r
-'yet-another-spider','<a href="http://188.40.112.195/" title="Yet-Another-Spider home page [new window]" target="_blank">Yet-Another-Spider</a>',\r
-'yisouspider', 'YisouSpider (no additional information in UA string)',\r
-'yooglifetchagent','<a href="http://www.yoogli.com/" title="yoogliFetchAgent home page [new window]" target="_blank">yoogliFetchAgent</a>',\r
-'z\-add_link_checker','<a href="http://w3.z-add.co.uk/linkcheck/" title="Z-Add Link Checker home page [new window]" target="_blank">Z-Add Link Checker</a>',\r
-'zealbot','ZealBot',\r
-'zhuaxia','<a href="http://www.zhuaxia.com/"  target="_blank">ZhuaXia</a>',\r
-'zspider','<a href="http://feedback.redkolibri.com/" title="Bot home page [new window]" target="_blank">zspider</a>',\r
-'zeus','<a href="http://www.webmasterworld.com/forum11/1840.htm" title="Bot documentation [new window]" target="_blank">Zeus Webster Pro</a>',\r
-'zumbot','<a href="http://help.zum.com/inquiry" title="ZumBot home page [new window]" target="_blank">ZumBot</a>',\r
-'ng\/1\.','<a href="http://www.exabot.com/" title="Bot home page [new window]" target="_blank">NG 1.x (Exalead)</a>', # put at end to avoid false positive\r
-'ng\/2\.','<a href="http://www.exabot.com/" title="Bot home page [new window]" target="_blank">NG 2.x (Exalead)</a>', # put at end to avoid false positive\r
-'exabot','<a href="http://www.exabot.com/" title="Bot home page [new window]" target="_blank">Exabot</a>', # put at end to avoid false positive\r
-# Other id that are 99% of robots\r
-'wget','WGet tools',\r
-'libwww','Perl tool',\r
-'^java\/[0-9]','<a href="http://www.projecthoneypot.org/harvester_useragents.php" title="Bot home page [new window]" target="_blank">Java (Often spam bot)</a>', # put at end to avoid false positive\r
-# Generic robot\r
-'robot', 'Unknown robot (identified by \'robot\')',\r
-'checker', 'Unknown robot (identified by \'checker\')',\r
-'crawl', 'Unknown robot (identified by \'crawl\')',\r
-'discovery', 'Unknown robot (identified by \'discovery\')',\r
-'hunter', 'Unknown robot (identified by \'hunter\')',\r
-'scanner', 'Unknown robot (identified by \'scanner\')',\r
-'spider', 'Unknown robot (identified by \'spider\')',\r
-'sucker', 'Unknown robot (identified by \'sucker\')',\r
-'bot[\s_+:,\.\;\/\\\-]', 'Unknown robot (identified by \'bot\' followed by a space or one of the following characters _+:,.;/\-)',\r
-'[\s_+:,\.\;\/\\\-]bot', 'Unknown robot (identified by a space or one of the characters _+:,.;/\- followed by \'bot\')',\r
-'curl', 'Common *nix tool for automating web document retrieval. Most likely a bot.',\r
-'php', 'A PHP script',\r
-'ruby\/', 'Ruby script',\r
-# Additional bots found by Sussex.\r
-'^[1-3]$', 'Generic bot identified as "1", "2" or "3"',\r
-'alltop', 'alltop',\r
-'applesyndication', 'applesyndication',\r
-'asynchttpclient', 'asynchttpclient',\r
-'bingbot', '<a href="http://www.bing.com/bingbot.htm" title="Bing home page [new window]" target="_blank">Bingbot</a>',\r
-'blogged_crawl', 'blogged_crawl',\r
-'bloglovin', 'bloglovin',\r
-'butterfly', 'butterfly',\r
-'buzztracker', 'buzztracker',\r
-'carpathia', 'carpathia',\r
-'catbot', 'catbot',\r
-'chattertrap', 'chattertrap',\r
-'check_http', 'check_http (nagios)',\r
-'coldfusion', 'coldfusion',\r
-'covario', 'covario',\r
-'daylifefeedfetcher', 'daylifefeedfetcher',\r
-'discobot', 'discobot',\r
-'dlvr\.it', 'dlvr.it',\r
-'dreamwidth', 'dreamwidth',\r
-'drupal', 'Drupal Site',\r
-'ezoom', 'ezoom',\r
-'feedmyinbox', 'feedmyinbox',\r
-'feedroll\.com', 'feedroll.com',\r
-'feedzira', 'feedzira',\r
-'fever\/', '<a href="http://feedafever.com">Feed a Fever</a>',\r
-'freenews', 'freenews',\r
-'geohasher', 'geohasher',\r
-'hanrss', 'hanrss',\r
-'inagist', 'inagist',\r
-'jacobin\sclub', 'jacobin club',\r
-'jakarta', 'jakarta',\r
-'js\-kit', 'js-kit',\r
-'largesmall\scrawler', 'largesmall crawler',\r
-'linkedinbot', 'linkedinbot',\r
-'longurl', 'longurl',\r
-'metauri', 'metauri',\r
-'microsoft\-webdav\-miniredir', 'microsoft-webdav-miniredir',\r
-'^motorola$', 'Suspected Bot masquerading as "Motorola"',\r
-'movabletype', 'movabletype',\r
-'^mozilla\/3\.0\s\(compatible$', 'Suspected bot masqurading as Mozilla',\r
-'^mozilla\/4\.0$', 'Suspected bot masqurading as Mozilla',\r
-'^mozilla\/4\.0\s\(compatible;\)$', 'Suspected bot masqurading as Mozilla',\r
-'^mozilla\/5\.0$', 'Suspected bot masqurading as Mozilla',\r
-'^mozilla\/5\.0\s\(compatible;$', 'Suspected bot masqurading as Mozilla',\r
-'^mozilla\/5\.0\s\(en\-us\)$', 'Suspected bot masqurading as Mozilla',\r
-'^mozilla\/5\.0\sfirefox\/3\.0\.5$', 'Suspected bot masqurading as Mozilla',\r
-'^msie', 'Suspected bot masquerading as M$ IE',\r
-'netnewswire', 'netnewswire',\r
-'\snetseer\s', '<a href="http://www.netseer.com/crawler.html">Net Seer</a>',\r
-'netvibes', 'netvibes',\r
-'newrelicpinger', 'newrelicpinger',\r
-'newsfox', 'Fox News',\r
-'nextgensearchbot', 'nextgensearchbot',\r
-'ning', 'ning',\r
-'pingdom', 'pingdom',\r
-'pita', 'pita (pain in the ass?)',\r
-'postpost', 'postpost',\r
-'postrank', 'postrank',\r
-'printfulbot', 'printfulbot',\r
-'protopage', 'protopage',\r
-'proximic', '<a href="http://www.proximic.com/info/spider.php" title="Proximic Spider home page [new window]" target="_blank">Proximic Spider</a>',\r
-'quipply', 'quipply',\r
-'r6\_', '<a href="http://www.radian6.com/crawler">Radian 6 Crawler</a>',\r
-'ratingburner', 'ratingburner',\r
-'regator', 'regator',\r
-'rome\sclient', 'rome client',\r
-'rpt\-httpclient', 'rpt-httpclient',\r
-'rssgraffiti', 'rssgraffiti',\r
-'sage\+\+', 'sage++',\r
-'scoutjet', '<a href="http://wwww.scoutjet.com/" target="_blank">ScoutJet</a> crawler for <a href="http://blekko.com/" target="_blank">Blekko</a>.',\r
-'simplepie', 'simplepie',\r
-'sitebot', 'sitebot',\r
-'summify\.com', '<a href="http://summify.com/">summify.com</a>',\r
-'superfeedr', 'superfeedr',\r
-'synthesio', 'synthesio',\r
-'teoma', 'teoma',\r
-'topblogsinfo', 'topblogsinfo',\r
-'topix\.net', 'topix.net',\r
-'trapit', 'trapit',\r
-'trileet', 'trileet',\r
-'tweetedtimes', '<a href="http://tweetedtimes.com">The Tweeted Times</a>',\r
-'twisted\spagegetter', 'twisted pagegetter',\r
-'twitterbot', '<span title="As on Nov. 27, 2015, the user agent string did not contain a web address.">Twitterbot</span>',\r
-'twitterfeed', 'twitterfeed',\r
-'unwindfetchor', 'unwindfetchor',\r
-'wazzup', 'wazzup',\r
-'windows\-rss\-platform', 'windows-rss-platform',\r
-'wiumi', 'wiumi',\r
-'xydo', 'xydo',\r
-'yahoo!\sslurp', 'Additional Yahoo bots.',\r
-'yahoo\spipes', 'Additional Yahoo bots.',\r
-'yahoo\-newscrawler', 'Additional Yahoo bots.',\r
-'yahoocachesystem', 'Additional Yahoo bots.',\r
-'yahooexternalcache', 'Additional Yahoo bots.',\r
-'yahoo!\ssearchmonkey', 'Additional Yahoo bots.',\r
-'yahooysmcm', 'Additional Yahoo bots.',\r
-'yammer', 'yammer',\r
-#'yandexbot', 'yandexbot', #already covered by 'yandex'\r
-'yeti', 'yeti',\r
-'yie8', 'yie8',\r
-'youdao', 'youdao',\r
-'yourls', 'yourls',\r
-'zemanta', 'zemanta',\r
-'zend_http_client', 'Zend Http Client',\r
-'no_user_agent','Unknown robot (identified by empty user agent string)',\r
-# Unknown robots identified by hit on robots.txt\r
-'unknown', 'Unknown robot (identified by hit on \'robots.txt\')'\r
-);\r
-\r
-\r
-# RobotsAffiliateLib\r
-# This list try to tell by which Search Engine a robot is used\r
-#-------------------------------------------------------------\r
-%RobotsAffiliateLib = (\r
-'bingpreview'=>'Bing',\r
-'fast\-webcrawler'=>'AllTheWeb',\r
-'googlebot'=>'Google',\r
-'google\-sitemap'=>'Google',\r
-'google[_+\s]web[_+\s]preview'=>'Google',\r
-'msnbot'=>'MSN',\r
-'nutch'=>'Looksmart',\r
-'scooter'=>'AltaVista',\r
-'wisenutbot'=>'Looksmart',\r
-'yahoo\-blogs'=>'Yahoo',\r
-'yahoo\-verticalcrawler'=>'Yahoo',\r
-'yahoofeedseeker'=>'Yahoo',\r
-'yahooseeker\-testing'=>'Yahoo',\r
-'yahooseeker'=>'Yahoo',\r
-'yahoo\-mmcrawler'=>'Yahoo',\r
-'yahoo!_mindset'=>'Yahoo',\r
-'zyborg'=>'Looksmart',\r
-'cfetch'=>'Kosmix',\r
-'^voyager\/'=>'Kosmix',\r
-# Additional bots found by Sussex.\r
-'feedfetcher\-google'=>'Google',\r
-'bingbot'=>'MSN',\r
-'twitterbot'=>'Twitter',\r
-'twitterfeed'=>'Twitter',\r
-'yahoo!\sslurp'=>'Yahoo',\r
-'yahoo\spipes'=>'Yahoo',\r
-'yahoo-newscrawler'=>'Yahoo',\r
-'yahoocachesystem'=>'Yahoo',\r
-'yahooexternalcache'=>'Yahoo',\r
-'yahoo!\ssearchmonkey'=>'Yahoo',\r
-'yahooysmcm'=>'Yahoo'\r
-);\r
-\r
-1;\r
+# AWSTATS ROBOTS DATABASE
+#-------------------------------------------------------
+# If you want to add robots to extend AWStats database detection capabilities,
+# you must add an entry in RobotsSearchIDOrder_listx and RobotsHashIDLib.
+
+# The entry in RobotsSearchIDOrder_listx is a Perl regular expression
+# (see http://perldoc.perl.org/perlreref.html). AWSTats applies these
+# expressions to the user agent string in the order given by the lists. The
+# first match specifies the robot.
+#
+# Note: This regular expression must not contain any whitespace.
+# Otherwise AWStats will produce lines in the database that
+# will be misinterpreted and as a consequence the corresponding data in the
+# generated HTML reports will be wrong. If you want to match whitespace in
+# the user agent string, use other constructs like '\s', '[:blank:]',
+# '\p{IsSpace}', '\x20' etc.
+#
+# The corresponding entry in RobotsHashIDLib contains the regular expression
+# as key, followed by a string containing HTML-text. AWStats inserts this
+# text into reports to describe the bot. If possible the text should contain
+# a link to the bot home page. This makes it easier for sysadmins to find
+# the information necessary e.g. to adapt the robots.txt file.
+#
+# An entry in the RobotsAffiliateLib is not necessary. An entry in this list
+# contains as first part the regular expression specifying the bot. The
+# second part is a string that gives the Company or product managing the bot.
+# This information is not used yet.
+#
+# There are several sorts of bots that AWStats is not able to detect and
+# therefore a considerable amount of bot generated traffic counts
+# as user traffic:
+#
+# a) A crawler that identifies itself in the referrer string, but not in
+#    the user agent string. An example is the crawler from semalt.semalt.com.
+#
+# b) Crawlers that correctly access robots.txt but identify themselves in
+#    in the user agent string only once or just a few times. Most of the
+#    time a user agent string ist used that does not contain hints that
+#    a bot is involved. An example is the iCjobs spider.
+#    msnbot-UDiscovery/2.0b seems to show this behaviour too.
+#
+#
+#
+#-------------------------------------------------------
+
+# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html
+#              added dipsie (not tested with real data).
+#              added DomainsDB.net http://domainsdb.net/
+#              added ia_archiver-web.archive.org (was inadvertently grouped with Alexa traffic)
+#              added Nutch (used by looksmart (furl?))
+#              added rssImagesBot
+#              added Sqworm
+#              added t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e
+#              added w3c css-validator
+#              added documentation link to bot home pages for above and selected major bots.
+#                    In the case of international bots, choose .com page.
+#                    Included tool tip (html "title").
+#                    To do: parameterize to match both AWStats language and tooltips settings.
+#                    To do: add html links for all bots based on current documentation in source
+#                           files referenced below.
+#              changed '\wbot[\/\-]', to '\wbot[\/\-]' (removed comma)
+#              made minor grammar corrections to notes below
+# 2005-08-24   added YahooSeeker-Testing
+#                      added w3c-checklink
+#                      updated url for ask.com
+# 2005-08-24           added Girafabot http://www.girafa.com/
+# 2005-08-30           added PluckFeedCrawler http://www.pluck.com/
+#              added Gaisbot/3.0 (robot05@gais.cs.ccu.edu.tw; )
+#              dded geniebot (wgao@genieknows.com)
+#              added BecomeBot link http://www.become.com/site_owners.html
+#              added topicblogs http://www.topicblogs.com/
+#              added Powermarks; seen used by referrer spam
+#              added YahooSeeker
+#              added NG/2. http://www.exabot.com/
+# 2005-09-15   added link for Walhello appie
+#              added bender focused_crawler
+#              updated YahooSeeker description (blog crawler)
+# 2005-09-16   added link for http://linkchecker.sourceforge.net
+#              added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl)
+#              added Blogslive  info@blogslive.com intelliseek.com
+#              added BlogPulse (ISSpider-3.0) intelliseek.com
+# 2005-09-26   added Feedfetcher-Google (http://www.google.com/feedfetcher.html)
+#              added EverbeeCrawler
+#              added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html
+#              added link for Bloglines http://www.bloglines.com
+# 2005-10-19   fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html)
+#              added Blogshares Spiders (Synchronized V1.5.1)
+#              added yacy
+# 2005-11-21   added Argus www.simpy.com
+#              added BlogsSay :: RSS Search Crawler (http://www.blogssay.com/)
+#              added MJ12bot http://majestic12.co.uk/bot.php
+#              added OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)
+#              added OutfoxBot/0.3 (For internet experiments; outfox.agent@gmail.com)
+#              added RufusBot Rufus Web Miner http://64.124.122.252.webaroo.com/feedback.html
+#              added Seekbot (http://www.seekbot.net/bot.html)
+#              added Yahoo-MMCrawler/3.x (mms-mmcrawler-support@yahoo-inc.com)
+#               added link for BaiDuSpider
+#              added link for Blogshares Spider
+#              added link for StackRambler http://www.rambler.ru/doc/faq.shtml
+#              added link for WISENutbot
+#              added link for ZyBorg/1.0 (wn-14.zyborg@looksmart.net; http://www.WISEnutbot.com.  Moved location to above wisenut to avoid classification as wisenut
+# 2005-12-15
+#              added FAST Enteprise Crawler/6 (www dot fastsearch dot com). Note spelling Enteprise not Enterprise.
+#              added findlinks http://wortschatz.uni-leipzig.de/findlinks/
+#              added IBM Almaden Research Center WebFountain™ http://www.almaden.ibm.com/cs/crawler [hc3]
+#              added INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)
+#              added lmspider (lmspider@scansoft.com) http://www.nuance.com/
+#              added noxtrumbot http://www.noxtrum.com/
+#              added SandCrawler (Microsoft)
+#              added SBIder http://www.sitesell.com/sbider.html
+#              added SeznamBot http://fulltext.seznam.cz/
+#              added sohu-search http://corp.sohu.com/ (looked for //robots.txt not /robots.txt)
+#              added the ruffle SemanticWeb crawler v0.5 - http://www.unreach.net
+#              added WebVulnCrawl/1.0 libwww-perl/5.803 (looked for //robots.txt not /robots.txt)
+#              added Yahoo! Japan keyoshid http://www.yahoo.co.jp/
+#              added Y!J http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html
+#              added link for GigaBot
+#              added link for MagpieRSS
+#              added link for MSIECrawler
+# 2005-12-21
+#              added aipbot http://www.aipbot.com aipbot@aipbot.com [matthys70 users.sourceforge.net]
+#              added Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)
+#              added Fast-Search-Engine http://www.fast-search-engine.com/ [matthys70  users.sourceforge.net]
+#              added g2Crawler (nobody@airmail.net) http://crawler.instantnetworks.net/
+#              added Jakarta commons-httpclient http://jakarta.apache.org/commons/httpclient/ (hit robots.txt).  May be used as robot or browser - a site may want to remove this entry.
+#              added OmniExplorer_Bot http://www.omni-explorer.com/ [matthys70 users.sourceforge.net]
+#              added USTC-Semantic-Group ai.ustc.edu.cn/mas/en/research/index.php ?
+# 2005-12-22
+#              added EARTHCOM.info www.earthcom.info
+#              added HTTrack off-line browser 'httrack','HTTrack', http://www.httrack.com/ [Moizes Gabor]
+#              added KummHttp http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b [Moizes Gabor]
+# 2006-01-01
+#              added Dulance http://www.dulance.com/bot.jsp
+#              added MojeekBot http://www.mojeek.com/bot.html
+#              added nicebot http://www.egghelp.org/setup.htm ?
+#              added Snappy http://www.urltrends.com/faq.php
+#              added sohu agent
+#              added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net]
+#              added zspider http://feedback.redkolibri.com/
+# 2006-01-13
+#              added boitho.com-dc http://www.boitho.com/dcbot.html
+#              added IRLbot http://irl.cs.tamu.edu/crawler
+#              added virus_detector virus_harvester@securecomputing.com
+#              added Wavefire http://www.wavefire.com; info@wavefire.com
+#              added WebFilter Robot
+# 2006-01-24
+#              added Shim-Crawler http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp
+#              added Exabot exabot.com
+#              added LetsCrawl.com http://letscrawl.com
+#              added ichiro http://help.goo.ne.jp/door/crawlerE.html
+# 2006-01-27    additional 22 robots from a list provided by Moizes Gabor
+#              added ALeadSoftbot      http://www.aleadsoft.com/bot.htm
+#              added CipinetBot        http://www.cipinet.com/bot.html
+#              added Cuasarbot http://www.cuasar.com/
+#              added Dumbot    http://www.dumbfind.com/
+#              added Extreme_Picture_Finder    http://www.exisoftware.com/
+#              added Fooky.com/ScorpionBot/ScoutOut    http://www.fooky.com/scorpionbots
+#              added IlTrovatore-Setaccio      http://www.iltrovatore.it/aiuto/motore_di_ricerca.html  bot@iltrovatore.it
+#              added InsurancoBot      http://www.fastspywareremoval.com/
+#              added InternetArchive   http://lucene.apache.org/nutch/bot.html         nutch-agent@lucene.apache.org
+#              added KazoomBot http://www.kazoom.ca/bot.html   kazoombot@kazoom.ca
+#              added Kurzor    http://www.easymail.hu/ cursor@easymail.hu
+#              added NutchCVS  http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org
+#              added NutchOSU-VLIB     http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org
+#              added Orbiter   http://www.dailyorbit.com/bot.htm
+#              added PHP_version_tracker       http://www.nexen.net/phpversion/bot.php
+#              added SuperBot  http://www.sparkleware.com/superbot/
+#              added SynooBot  http://www.synoo.de/bot.html    webmaster@synoo.com
+#              added TestBot   http://www.agbrain.com/
+#              added TutorGigBot       http://www.tutorgig.info/
+#              added WebIndexer        mailto://webindexerv1@yahoo.com
+#              added WebMiner  http://64.124.122.252/feedback.html
+# 2006-02-01
+#              added heritrix https://sourceforge.net/forum/message.php?msg_id=3550202
+#              added Zeus Webster Pro https://sourceforge.net/forum/message.php?msg_id=3141164
+#               additional robots from a list provided by Moizes Gabor [ mojzi -a-t- free mail hu ]
+#              added Candlelight_Favorites_Inspector
+#              added DomainChecker
+#              added EasyDL
+#              added FavOrg
+#              added Favorites_Sweeper
+#              added Html_Link_Validator
+#              added Internet_Ninja
+#              added JRTwine_Software_Check_Favorites_Utility
+#              fixed Microsoft_URL_Control
+#              added miniRank
+#              added Missigua_Locator
+#              added NPBot
+#              added Ocelli
+#              added Onet.pl_SA
+#              added proodleBot
+#              added SearchGuild_DMOZ_Experiment
+#              added Susie
+#              added Website_Monitoring_Bot
+#              added Xenu_Link_Sleuth
+# 2006-05-15
+#              added ASPseek http://www.aspseek.org/
+#              added AdamM Bot http://home.blic.net/adamm/
+#              added archive.org_bot http://crawls.archive.org/collections/bncf/crawl.html
+#              added arianna.libero.it (Italian Portal/search engine)
+#              added Biz360 spider http://www.biz360.com
+#              added BlogBridge Service http://www.blogbridge.com/
+#              added BlogSearch http://www.icerocket.com/
+#              added libcrawl
+#              added edgeio-relanshanbottriever http://www.edgeio.com
+#              added FeedFlow http://feedflow.com/about
+#              added Biblioteca Nazionale Centrale di Firenze (Italian National Archive) http://www.bncf.firenze.sbn.it/raccolta.txt
+#              added Java catchall - used by many spam bots
+#              added lanshanbot http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=%5Cbid_g_l_140406_1%5Cb
+#              added msnbot-media http://search.msn.com/msnbot.htm
+#              added MT::Telegraph::Agent
+#              added Netluchs http://www.netluchs.de/ (German SE bot)
+#              added oBot http://www.webmasterworld.com/forum11/1616.htm
+#              added Onfolio http://www.onfolio.com/  (IE Toolbar plugin) - hit rss feeds.
+#              added ping.blo.gs http://blo.gs/ping.php blog bot
+#              added Sphere Scout http://www.sphere.com/
+#              added sproose crawler http://www.sproose.com/bot.html
+#              added SyndicAPI http://syndicapi.com/bot.html
+#              added Yahoo! Mindset http://mindset.research.yahoo.com/
+#              added msrabot
+#              added Vagabondo & Vagabondo-WAP http://www.wise-guys.nl/Contact/index.php?botselected=webagents&amp;lang=uk
+#              fixed Missigua Locator detection (Missigua_Locator -> Missigua Locator)
+#              changed echo to echo! to avoid conflict with the bonecho (Firefox 2.0) browser.
+#                      This requires you to reprocess historic logs if you want EchO! to be recognized for older reports.
+# 2006-05-17
+#              added Alpha Search Agent # 62.152.125.60 Eurologon Srl
+#              added Krugle http://www.krugle.com/crawler/info.html the search engine for developers
+#              added Octora Beta Bot http://www.octora.com/ # Blog and Rss Search Engine
+#              added UbiCrawler http://law.dsi.unimi.it/ubicrawler/
+#              added Yahoo! Slurp China http://misc.yahoo.com.cn/help.html
+#                      You must reprocess old logs for the Yahoo! Slurp China bot to be detected in old reports
+# 2006-05-20
+#              added 1-More Scanner http://www.myzips.com/software/1-More-Scanner.phtml
+#              added Accoona-AI-Agent http://www.accoona.com/
+#              added ActiveBookmark http://www.libmaster.com/active_bookmark.php
+#              added BIGLOTRON http://www.biglotron.com/robot.html
+#              added Bookmark-Manager http://bkm.sourceforge.net/
+#              added cbn00glebot
+#              added Cerberian Drtrs http://www.pgts.com.au/cgi-bin/psql?robot_info=25240
+#              added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork
+#              added CheckWeb link validator http://p.duby.free.fr/chkweb.htm
+#              added Computer and Automation Research Institute Crawler http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html
+#              added ConveraCrawler http://www.authoritativeweb.com/crawl/
+#              added ConveraMultiMediaCrawler http://www.authoritativeweb.com/crawl/
+#              added CSE HTML Validator Lite Online http://online.htmlvalidator.com/php/onlinevallite.php
+#              added Cursor http://adcenter.hu/docs/en/bot.html
+#              added Custo http://www.netwu.com/custo/
+#              added DataFountains/DMOZ Downloader http://infomine.ucr.edu/
+#              added Deepindex http://www.deepindex.net/faq.php
+#              added DNSGroup http://www.dnsgroup.com/
+#              added DoCoMo http://www.nttdocomo.co.jp/
+#              added dumm.de-Bot http://www.dumm.de/
+#              added ETS v http://www.freetranslation.com/help/
+#              added eventax http://www.eventax.de/
+#              added FAST Enterprise Crawler * crawleradmin.t-info@telekom.de http://www.telekom.de/
+#              added FAST Enterprise Crawler http://www.fast.no/
+#              added FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de http://www.telekom.de/
+#              added FeedValidator http://feedvalidator.org/
+#              added FilmkameraBot http://www.filmkamera.at/bot.html
+#              added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece
+#              added Global Fetch http://www.wesonet.com/
+#              added GOFORITBOT http://www.goforit.com/about/
+#              added GoForIt.com http://www.goforit.com/about/
+#              added GPU p2p crawler http://gpu.sourceforge.net/search_engine.php
+#              added HooWWWer http://cosco.hiit.fi/search/hoowwwer/
+#              added HPPrint
+#              added HTMLParser http://htmlparser.sourceforge.net/
+#              added Hundesuche.com-Bot http://www.hundesuche.com/
+#              added InfoBot http://www.infobot.org/
+#              added InfociousBot http://corp.infocious.com/tech_crawler.php
+#              added InternetSupervision http://internetsupervision.com/
+#              added isearch2006 http://www.yahoo.com.cn/
+#              added IUPUI_Research_Bot http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/
+#              added KalamBot http://64.124.122.251/feedback.html
+#              added kamano.de NewsFeedVerzeichnis http://www.kamano.de/
+#              added Kevin http://dznet.com/kevin/
+#              added KnowItAll http://www.cs.washington.edu/research/knowitall/
+#              added Knowledge.com http://www.knowledge.com/
+#              added Kouaa Krawler http://www.kouaa.com/
+#              added ksibot http://ego.ms.mff.cuni.cz/
+#              added Link Valet Online http://www.htmlhelp.com/tools/valet/
+#              added lwp-request http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request
+#              added lwp-trivial http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm
+#              added MapoftheInternet.com http://MapoftheInternet.com/
+#              added Matrix S.p.A. - FAST Enterprise Crawler http://tin.virgilio.it/
+#              added Megite http://www.megite.com/
+#              added Metaspinner http://index.meta-spinner.de/
+#              added Mini-reptile
+#              added Misterbot http://www.misterbot.fr/
+#              added Miva http://www.miva.com/
+#              added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_m_141105_2\b
+#              added MSRBOT http://research.microsoft.com/research/sv/msrbot/
+#              added MS SharePoint Portal Server - MS Search 4.0 Robot http://support.microsoft.com/default.aspx?scid=kb;en-us;284022
+#              added Mydoyouhike http://www.doyouhike.net/my
+#              added NASA Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b
+#              added NetSprint http://www.netsprint.pl/serwis/
+#              added NimbleCrawler http://www.healthline.com/
+#              added OpenWebSpider http://www.openwebspider.org/
+#              added Oracle Ultra Search http://www.oracle.com/technology/products/ultrasearch/index.html
+#              added OSSProxy http://www.marketscore.com/FAQ.Aspx
+#              added passwordmaker.org http://passwordmaker.org/
+#              added PEAR HTTP Request class http://pear.php.net/
+#              added PEERbot http://www.peerbot.com/
+#              added PHP version tracker http://www.nexen.net/phpversion/bot.php
+#              added PictureOfInternet http://malfunction.org/poi/
+#              added plinki http://www.plinki.com/
+#              added Port Huron Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_1133\b
+#              added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_1135\b
+#              added ProjectWF-java-test-crawler
+#              added PyQuery http://sourceforge.net/projects/pyquery/
+#              added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/
+#              added Scumbot
+#              added Sensis Web Crawler http://www.sensis.com.au/
+#              added snap.com beta crawler http://www.snap.com/
+#              added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/
+#              added STEROID  Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm
+#              added Suchfin-Bot http://www.suchfin.de/
+#              added Sunrise http://www.sunrisexp.com/
+#              added Tagyu Agent http://www.tagyu.com/
+#              added Tcl http client package http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm
+#              added TeragramCrawlerSURF http://www.teragram.com/
+#              added Test Crawler http://netp.ath.cx/
+#              added UnChaos Bot Hybrid Web Search Engine http://www.unchaos.com/
+#              added unido-bot http://www.unchina.org/unido/unido/our_projects/3_3.html
+#              added UniversalFeedParser http://feedparser.org/ (seen from md301000.inktomisearch.com)
+#              added updated http://www.updated.com/
+#              added Vermut http://vermut.aol.com
+#              added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html
+#              added Vespa Crawler (Yahoo Norway?) http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb
+#              added VSE http://www.vivisimo.com/
+#              added webcrawl.net http://www.webcrawl.net/
+#              added Web Downloader http://www.krasu.ru/soft/chuchelo/
+#              added Webdup http://www.webdup.com/en/index.html
+#              added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_t_z_1484\b
+#              added WordPress http://wordpress.org/
+#              added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/
+#              added Xenu's Link Sleuth (with ')
+#              added xirq http://www.xirq.com/
+#              added yoogliFetchAgent http://www.yoogli.com/
+#              added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/
+#              -- fix - some robots were reported with _ where _ should have been a space.
+#              changed Xenu Link Sleuth
+#              changed microsoft[_+\s]url[_+\s]control -> microsoft_url_control
+#              changed favorites_sweeper -> favorites_sweeper
+#              -- updates
+#              updated AskJeeves to Ask
+# 2012-06-05 Albrecht Mueller
+#              added Grabber from SDSC (San Diego Supercomputer Center).
+# 2013-09-30 Albrecht Mueller
+# AWStats probably cannot detect this bot as it identifies itself in
+# the referrer field and not in the user agent string.
+#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
+#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
+#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
+
+# to do  MS Search 4.0 Robot
+
+#package AWSROB;
+
+
+# Robots list was found at http://www.robotstxt.org/wc/active/all.txt
+# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html
+# Rem: To avoid bad detection, some robot's ids were removed from this list:
+#      - Robots with ID of 3 letters only
+#      - Robots called 'webs' and 'tcl'
+# Rem: directhit changed into direct_hit (its real id)
+# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser
+# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser
+# Rem: roadrunner changed into road_runner
+# Rem: lycos changed to lycos_ to avoid confusion with lycos-online browser
+# Rem: voyager changed into ^voyager\/ to avoid to exclude voyager and amigavoyager browser
+
+# RobotsSearchIDOrder
+# It contains all matching criteria to search for in log fields. This list is
+# used to know in which order to search Robot IDs.
+# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more
+# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more
+# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+\s]' and are quoted.
+#-------------------------------------------------------
+@RobotsSearchIDOrder_list1 = (
+# Common robots (In robot file)
+'appie',
+'architext',
+'bingpreview',
+'bjaaland',
+'contentmatch',
+'ferret',
+'googlebot\-image',
+'googlebot',
+'google\-sitemaps',
+'google[_+\s]web[_+\s]preview',
+'grabber',
+'gulliver',
+'virus[_+\s]detector',         # Must be before harvest
+'harvest',
+'htdig',
+'jeeves',
+'linkwalker',
+'lilina',
+'lycos[_+\s]',
+'moget',
+'muscatferret',
+'myweb',
+'nomad',
+'scooter',
+'slurp',
+'^voyager\/',
+'weblayers',
+# Common robots (Not in robot file)
+'antibot',
+'bruinbot',
+'digout4u',
+'echo!',
+'fast\-webcrawler',
+'ia_archiver\-web\.archive\.org', # Must be before ia_archiver to avoid confusion with alexa
+'ia_archiver',
+'jennybot',
+'mercator',
+'netcraft',
+'msnbot\-media',
+'msnbot-udiscovery',
+'msnbot',
+'petersnews',
+'relevantnoise\.com',
+'unlost_web_crawler',
+'voila',
+'webbase',
+'webcollage',
+'cfetch',
+'zyborg',      # Must be before wisenut
+'wisenutbot'
+);
+@RobotsSearchIDOrder_list2 = (
+# Less common robots (In robot file)
+'007ac9',
+'[^a]fish',
+'abcdatos',
+'abonti\.com',
+'acme\.spider',
+'ahoythehomepagefinder',
+'ahrefsbot',
+'alkaline',
+'anthill',
+'arachnophilia',
+'arale',
+'araneo',
+'aretha',
+'ariadne',
+'powermarks',
+'arks',
+'aspider',
+'atn\.txt',
+'atomz',
+'auresys',
+'backrub',
+'bbot',
+'bigbrother',
+'blackwidow',
+'blindekuh',
+'bloodhound',
+'borg\-bot',
+'brightnet',
+'bspider',
+'cactvschemistryspider',
+'calif[^r]',
+'cassandra',
+'cgireader',
+'checkbot',
+'christcrawler',
+'churl',
+'cienciaficcion',
+'cms\scrawler',
+'collective',
+'combine',
+'conceptbot',
+'coolbot',
+'core',
+'cosmos',
+'crazywebcrawler',
+'cruiser',
+'cusco',
+'cyberspyder',
+'desertrealm',
+'deweb',
+'dienstspider',
+'digger',
+'diibot',
+'direct_hit',
+'dnabot',
+'domainappender',
+'download_express',
+'dragonbot',
+'dwcp',
+'e\-collector',
+'ebiness',
+'elfinbot',
+'emacs',
+'emcspider',
+'esther',
+'evliyacelebi',
+'fastcrawler',
+'feedcrawl',
+'fdse',
+'felix',
+'fetchrover',
+'fido',
+'finnish',
+'fireball',
+'fouineur',
+'francoroute',
+'freecrawl',
+'funnelweb',
+'gama',
+'gazz',
+'gcreep',
+'getbot',
+'geturl',
+'golem',
+'gougou',
+'grapnel',
+'griffon',
+'gromit',
+'gulperbot',
+'hambot',
+'havindex',
+'hometown',
+'htmlgobble',
+'hyperdecontextualizer',
+'iajabot',
+'iaskspider',
+'hl_ftien_spider',
+'sogou',
+'icjobs\.de',
+'iconoclast',
+'ilse',
+'imagelock',
+'incywincy',
+'informant',
+'infoseek',
+'infoseeksidewinder',
+'infospider',
+'inspectorwww',
+'intelliagent',
+'irobot',
+'iron33',
+'israelisearch',
+'javabee',
+'jbot',
+'jcrawler',
+'jobo',
+'jobot',
+'joebot',
+'jubii',
+'jumpstation',
+'kapsi',
+'katipo',
+'kilroy',
+'ko[_+\s]yappo[_+\s]robot',
+'kummhttp',
+'labelgrabber\.txt',
+'larbin',
+'legs',
+'linkidator',
+'linkscan',
+'lockon',
+'logo_gif',
+'macworm',
+'magpie',
+'marvin',
+'mattie',
+'mediafox',
+'merzscope',
+'meshexplorer',
+'mindcrawler',
+'mnogosearch',
+'momspider',
+'monster',
+'motor',
+'muncher',
+'mwdsearch',
+'ndspider',
+'nederland\.zoek',
+'netcarta',
+'netmechanic',
+'netscoop',
+'newscan\-online',
+'nhse',
+'northstar',
+'nzexplorer',
+'objectssearch',
+'occam',
+'octopus',
+'openfind',
+'orb_search',
+'packrat',
+'pageboy',
+'parasite',
+'patric',
+'pegasus',
+'perignator',
+'perlcrawler',
+'phantom',
+'phpdig',
+'piltdownman',
+'pimptrain',
+'pioneer',
+'pitkow',
+'pjspider',
+'plumtreewebaccessor',
+'poppi',
+'portalb',
+'psbot',
+'python',
+'raven',
+'rbse',
+'resumerobot',
+'rhcs',
+'road_runner',
+'robbie',
+'robi',
+'robocrawl',
+'robofox',
+'robozilla',
+'roverbot',
+'rules',
+'safetynetrobot',
+'semalt', #Note: This entry will not work as this crawler identifies itself
+# in the referrer string and not in the user agent string
+'search\-info',
+'search_au',
+'searchprocess',
+'senrigan',
+'sgscout',
+'shaggy',
+'shaihulud',
+'sift',
+'simbot',
+'sistrix', #Virus/trojan-infection? fr-crawler, ca-crawler? See https://www.projecthoneypot.org/ip_37.59.55.128, https://www.projecthoneypot.org/ip_198.27.80.144
+'site\-valet',
+'sitetech',
+'skymob',
+'slcrawler',
+'smartspider',
+'snooper',
+'solbot',
+'speedy',
+'spider[_+\s]monkey',
+'spiderbot',
+'spiderline',
+'spiderman',
+'spiderview',
+'spry',
+'sqworm',
+'ssearcher',
+'suke',
+'sunrise',
+'suntek',
+'sven',
+'tach_bw',
+'tagyu_agent',
+'tailrank',
+'tarantula',
+'tarspider',
+'techbot',
+'templeton',
+'titan',
+'titin',
+'tkwww',
+'tlspider',
+'ucsd',
+'udmsearch',
+'universalfeedparser',
+'urlck',
+'valkyrie',
+'verticrawl',
+'victoria',
+'visionsearch',
+'voidbot',
+'vwbot',
+'w3index',
+'w3m2',
+'wallpaper',
+'wanderer',
+'wapspIRLider',
+'webbandit',
+'webcatcher',
+'webcopy',
+'webfetcher',
+'webfoot',
+'webinator',
+'weblinker',
+'webmirror',
+'webmoose',
+'webquest',
+'webreader',
+'webreaper',
+'websnarf',
+'webspider',
+'webvac',
+'webwalk',
+'webwalker',
+'webwatch',
+'whatuseek',
+'whowhere',
+'wired\-digital',
+'wmir',
+'wolp',
+'wombat',
+'wordpress',
+'worm',
+'woozweb',
+'wwwc',
+'wz101',
+'xenu\slink\ssleuth',
+'xget',
+# Other robots reported by users
+'^finbot', #UA string starts with "finbot", should not match "elfinbot"
+'^webindex$', #UA should not match "webindexer"
+'1\-more_scanner',
+'360spider',
+'a6-indexer',
+'accoona\-ai\-agent',
+'activebookmark',
+'adamm_bot',
+'adsbot-google',
+'advbot',
+'affectv\.co\.uk',
+'almaden',
+'aipbot',
+'aleadsoftbot',
+'alpha_search_agent',
+'allrati',
+'aport',
+'applebot',
+'archive\-de\.com',
+'archive\.org_bot',
+'argus',               # Must be before nutch
+'arianna\.libero\.it',
+'aspseek',
+'asterias',
+'awbot',
+'backlinktest\.com',
+'baiduspider',
+'becomebot',
+'bender',
+'betabot',
+'biglotron',
+'bittorrent_bot',
+'biz360[_+\s]spider',
+'blexbot',
+'blogbridge[_+\s]service',
+'bloglines',
+'blogpulse',
+'blogsearch',
+'blogshares',
+'blogslive',
+'blogssay',
+'bncf\.firenze\.sbn\.it\/raccolta\.txt',
+'bobby',
+'boitho\.com\-dc',
+'bookmark\-manager',
+'boris',
+'bubing',
+'bumblebee',
+'candlelight[_+\s]favorites[_+\s]inspector',
+'careerbot',
+'cbn00glebot',
+'ccbot',
+'cerberian_drtrs',
+'cfnetwork',
+'cipinetbot',
+'checkweb_link_validator',
+'cliqzbot',
+'commons\-httpclient',
+'computer_and_automation_research_institute_crawler',
+'converamultimediacrawler',
+'converacrawler',
+'copubbot',
+'cscrawler',
+'cse_html_validator_lite_online',
+'cuasarbot',
+'cursor',
+'custo',
+'datafountains\/dmoz_downloader',
+'dataprovider\.com',
+'daumoa',
+'daviesbot',
+'daypopbot',
+'deepindex',
+'deusu',
+'dipsie\.bot',
+'dnsgroup',
+'doccheckbot',
+'domainchecker',
+'domainsdb\.net',
+'dotbot',
+'duckduckgo-favicons-bot',
+'dulance',
+'dumbot',
+'dumm\.de\-bot',
+'earthcom\.info',
+'easydl',
+'eccp',
+'edgeio\-retriever',
+'ernst[:blank:]2\.0',
+'ets_v',
+'exactseek',
+'extreme[_+\s]picture[_+\s]finder',
+'eventax',
+'everbeecrawler',
+'everest\-vulcan',
+'ezresult',
+'enteprise',
+'facebook',
+'facebot',
+'fast_enterprise_crawler.*crawleradmin\.t\-info@telekom\.de',
+'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de',
+'finderlein[_+\s]research[_+\s]crawler',
+'matrix_s\.p\.a\._\-_fast_enterprise_crawler', # must come before fast enterprise crawler
+'fast_enterprise_crawler',
+'fast\-search\-engine',
+'fastbot',
+'favicon',
+'favorg',
+'favorites_sweeper',
+'feedburner',
+'feedfetcher\-google',
+'feedflow',
+'feedster',
+'feedsky',
+'feedvalidator',
+'fetchbot',
+'filmkamerabot',
+'filterdb\.iss\.net',
+'findlinks',
+'findexa_crawler',
+'firmilybot',
+'foaf-search\.net',
+'fooky\.com\/ScorpionBot',
+'g2crawler',
+'gaisbot',
+'geniebot',
+'genieo',
+'gigablastopensource',
+'gigabot',
+'girafabot',
+'global_fetch',
+'gnodspider',
+'goforit\.com',
+'goforitbot',
+'gonzo',
+'grapeshot',
+'grub',
+'gpu_p2p_crawler',
+'henrythemiragorobot',
+'heritrix',
+'holmes',
+'hoowwwer',
+'hpprint',
+'htmlparser',
+'html[_+\s]link[_+\s]validator',
+'httrack',
+'hundesuche\.com\-bot',
+'i-bot',
+'icarus6j',
+'ichiro',
+'idmarch',
+'iltrovatore\-setaccio',
+'implisensebot',
+'infobot',
+'infociousbot',
+'infohelfer',
+'infomine',
+'insurancobot',
+'integromedb\.org',
+'internet[_+\s]ninja',
+'internetarchive',
+'internetseer',
+'internetsupervision',
+'ips\-agent',
+'irlbot',
+'isearch2006',
+'istellabot',
+'iupui_research_bot',
+'izsearch',
+'james\sbot',
+'jobboerse', #AWStats seems not to find this one despite the fact that "JobboerseBot" and "jobboerse.com" appear in the UA-string, maybe some previous entry matches
+'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility',
+'justview',
+'kalambot',
+'kamano\.de_newsfeedverzeichnis',
+'kazoombot',
+'kevin',
+'keyoshid', # Must come before Y!J
+'kinjabot',
+'kinja\-imagebot',
+'knowitall',
+'knowledge\.com',
+'kouaa_krawler',
+'krugle',
+'ksibot',
+'kurzor',
+'lanshanbot',
+'letscrawl\.com',
+'libcrawl',
+'linkbot',
+'linkdex\.com',
+'link_valet_online',
+'metager\-linkchecker',        # Must be before linkchecker
+'linkchecker',
+'linkstats\sbot',
+'lipperhey',
+'livejournal\.com',
+'lmspider',
+'loadtimebot',
+'lssrocketcrawler',
+'ltbot',
+'ltx71',
+'lwp\-request',
+'lwp\-trivial',
+'madaali\.de',
+'magpierss',
+'mail\.ru',
+'mapoftheinternet\.com',
+'meanpathbot',
+'mediabot',
+'mediapartners\-google',
+'megaindex',
+'megite',
+'memorybot',
+'metager2-verification-bot',
+'metajobbot', #Does not show up in the results of Sep. 2015 despite the fact that the corresponing log file has about 40 entries containing "MetaJobBot" in the UA string - strange.
+'metaspinner',
+'miadev',
+'microsoft\sbits',
+'microsoft.*discovery', # = 'microsoft (?:office (?:protocol|existence)|data access internet publishing provider protocol) discovery',
+'microsoft[_+\s]url[_+\s]control',
+'mindupbot',
+'mini\-reptile',
+'minirank',
+'missigua_locator',
+'misterbot',
+'miva',
+'mizzu_labs',
+'mj12bot',
+'mojeekbot',
+'msiecrawler',
+'ms[_+\s]search[_+\s]6\.0[_+\s]robot',
+'ms_search_4\.0_robot',
+'msrabot',
+'msrbot',
+'mt::telegraph::agent',
+'mydoyouhike',
+'nagios',
+'nasa_search',
+'netestate\sne\scrawler',
+'netluchs',
+'netsprint',
+'newsgatoronline',
+'nicebot',
+'nimblecrawler',
+'noxtrumbot',
+'npbot',
+'loocalcrawler/nutch',
+'nutchcvs',
+'nutchosu\-vlib',
+'nutch',  # Must come after other nutch versions
+'ocelli',
+'octora_beta_bot',
+'omniexplorer[_+\s]bot',
+'onet\.pl[_+\s]sa',
+'onfolio',
+'opentaggerbot',
+'openwebspider',
+'optimizer',
+'oracle_ultra_search',
+'orangebot',
+'orbiter',
+'yodaobot',
+'qihoobot',
+'qwantify',
+'passwordmaker\.org',
+'pear_http_request_class',
+'peerbot',
+'perman',
+'php[_+\s]version[_+\s]tracker',
+'phpcrawl',
+'picmole',
+'pictureofinternet',
+'ping\.blo\.gs',
+'plinki',
+'pluckfeedcrawler',
+'plukkie',
+'pogodak',
+'pompos',
+'popdexter',
+'port_huron_labs',
+'postfavorites',
+'projectwf\-java\-test\-crawler',
+'proodlebot',
+'publiclibraryarchive',
+'pyquery',
+'rambler',
+'redalert',
+'riddler',
+'rogerbot',
+'rojo',
+'rssimagesbot',
+'ruffle',
+'rufusbot',
+'safeads\.xyz',
+'safesearch',
+'sandcrawler',
+'savetheworldheritage',
+'sbider',
+'schizozilla',
+'scumbot',
+'searchguild[_+\s]dmoz[_+\s]experiment',
+'searchmetricsbot',
+'seekbot',
+'semrushbot',
+'sensis_web_crawler',
+'seodiver',
+'seokicks\.de',
+'seoscanners',
+'seznambot',
+'shim\-crawler',
+'shoutcast',
+'sitedomain-bot',
+'siteexplorer\.info',
+'skimbot',
+'slysearch',
+'smtbot',
+'snap\.com_beta_crawler',
+'sohu\-search',
+'sohu', # "sohu agent"
+'snappy',
+'spbot',
+'sphere_scout',
+'spiderlytics',
+'spip',
+'sproose_crawler',
+'ssearch_bot',
+'steeler',
+'steroid__download',
+'stq_bot',
+'suchfin\-bot',
+'superbot',
+'surveybot',
+'susie',
+'syndic8',
+'syndicapi',
+'synoobot',
+'tcl_http_client_package',
+'technoratibot',
+'teragramcrawlersurf',
+'test_crawler',
+'testbot',
+'thumbsniper',
+'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',
+'topicblogs',
+'turnitinbot',
+'turtlescanner',               # Must be before turtle
+'turtle',
+'tutorgigbot',
+'twiceler',
+'ubicrawler',
+'ultraseek',
+'unchaos_bot_hybrid_web_search_engine',
+'unido\-bot',
+'unisterbot',
+'updated',
+'ustc\-semantic\-group',
+'vagabondo\-wap',
+'vagabondo',
+'vebidoobot',
+'vermut',
+'versus_crawler_from_eda\.baykan@epfl\.ch',
+'vespa_crawler',
+'voltron',
+'vortex',
+'vse\/',
+'w3c\-checklink',
+'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa',
+'w3c_validator',
+'watchmouse',
+'wavefire',
+'waybackarchive\.org',
+'wbsearchbot',
+'webclipping\.com',
+'webcompass',
+'webcrawl\.net',
+'web_downloader',
+'webdup',
+'webfilter',
+'webindexer',
+'webminer',
+'website[_+\s]monitoring[_+\s]bot',
+'webvulncrawl',
+'wells_search',
+'wer-liefert-was',
+'wesee:search',
+'wevikabot',
+'wonderer',
+'wotbox',
+'wume_crawler',
+'wwweasel',
+'xenu\'s_link_sleuth',
+'xenu_link_sleuth',
+'xirq',
+'xovibot',
+'y!j', # Must come after keyoshid Y!J
+'yacy',
+'yahoo\-blogs',
+'yahoo\-verticalcrawler',
+'yahoofeedseeker',
+'yahooseeker\-testing',
+'yahooseeker',
+'yahoo\-mmcrawler',
+'yahoo!_mindset',
+'yandex',
+'flexum',
+'yanga',
+'yet-another-spider',
+'yisouspider',
+'yooglifetchagent',
+'z\-add_link_checker',
+'zealbot',
+'zhuaxia',
+'zspider',
+'zeus',
+'ng\/1\.', # put at end to avoid false positive
+'ng\/2\.', # put at end to avoid false positive
+'exabot',  # put at end to avoid false positive
+# Additional bots found by Sussex.
+'^[1-3]$', # Hiding bots. Doesn't appear to be a valid user agent.
+'alltop',
+'applesyndication',
+'asynchttpclient',
+'bingbot',
+'blogged_crawl',
+'bloglovin',
+'butterfly',
+'buzztracker',
+'carpathia',
+'catbot',
+'chattertrap',
+'check_http', #(nagios) a monitoring tool
+'coldfusion',
+'covario',
+'daylifefeedfetcher',
+'discobot',
+'dlvr\.it',
+'dreamwidth',
+'drupal',
+'ezoom',
+'feedmyinbox',
+'feedroll\.com',
+'feedzira',
+'fever\/',
+'freenews',
+'geohasher',
+'hanrss',
+'inagist',
+'jacobin\sclub',
+'jakarta',
+'js\-kit',
+'largesmall\scrawler',
+'linkedinbot',
+'longurl',
+'metauri',
+'microsoft\-webdav\-miniredir',
+'^motorola$',
+'movabletype',
+# These appear to be bots trying to hide. All of the usual architecture data is missing.
+'^mozilla\/3\.0\s\(compatible$',
+'^mozilla\/4\.0$',
+'^mozilla\/4\.0\s\(compatible;\)$',
+'^mozilla\/5\.0$',
+'^mozilla\/5\.0\s\(compatible;$',
+'^mozilla\/5\.0\s\(en\-us\)$',
+'^mozilla\/5\.0\sfirefox\/3\.0\.5$',
+'^msie',
+# End of hiding bots.
+'netnewswire',
+'\snetseer\s',
+'netvibes',
+'newrelicpinger',
+'newsfox',
+'nextgensearchbot',
+'ning',
+'pingdom',
+'pita',
+'postpost',
+'postrank',
+'printfulbot',
+'protopage',
+'proximic',
+'quipply',
+'r6\_',
+'ratingburner',
+'regator',
+'rome\sclient',
+'rpt\-httpclient',
+'rssgraffiti',
+'sage\+\+',
+'scoutjet',
+'simplepie',
+'sitebot',
+'summify\.com',
+'superfeedr',
+'synthesio',
+'teoma',
+'topblogsinfo',
+'topix\.net',
+'trapit',
+'trileet',
+'tweetedtimes',
+'twisted\spagegetter',
+'twitterbot',
+'twitterfeed',
+'unwindfetchor',
+'wazzup',
+'windows\-rss\-platform',
+'wiumi',
+'xydo',
+'yahoo!\sslurp',
+'yahoo\spipes',
+'yahoo\-newscrawler',
+'yahoocachesystem',
+'yahooexternalcache',
+'yahoo!\ssearchmonkey',
+'yahooysmcm',
+'yammer',
+# 'yandexbot', #already covered by 'yandex'
+'yeti',
+'yie8',
+'youdao',
+'yourls',
+'zemanta',
+'zend_http_client',
+'zumbot',
+# Other id that are 99% of robots
+'wget',
+'libwww',
+'^java\/[0-9]'   # put at end to avoid false positive
+);
+@RobotsSearchIDOrder_listgen = (
+# Generic robot
+'robot',
+'checker',
+'crawl',
+'discovery',
+'hunter',
+'scanner',
+'spider',
+'sucker',
+'bot[\s_+:,\.\;\/\\\-]',
+# Identifies
+#"Mozilla/5.0 (Linux; U; Android 4.2.2; de-de; CUBOT P9 Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
+#as a but. There is a Android mobile phone called "CUBOT P9", so this is probably not a bot.
+'[\s_+:,\.\;\/\\\-]bot',
+'curl',
+'php',
+'ruby\/',
+'no_user_agent'
+);
+
+
+
+# RobotsHashIDLib
+# List of robots names ('robot id','robot clear text')
+#-------------------------------------------------------
+%RobotsHashIDLib   = (
+# Common robots (In robot file)
+'appie','<a href="http://www.walhello.com/" title="Bot home page [new window]" target="_blank">Walhello appie</a>',
+'architext','ArchitextSpider',
+'bingpreview','Bing Preview bot',
+'bjaaland','Bjaaland',
+'ferret','Wild Ferret Web Hopper #1, #2, #3',
+'contentmatch','<a href="http://p4p.cn.yahoo.com">Yahoo!China ContentMatch Crawler</a>',
+'googlebot\-image','<a href="http://www.google.com/bot.html" title="Bot home page [new window]" target="_blank">Googlebot-Image</a>',
+'googlebot','<a href="http://www.google.com/bot.html" title="Bot home page [new window]" target="_blank">Googlebot</a>',
+'google\-sitemaps', 'Google Sitemaps',
+'grabber', '<a href="http://www.sdsc.edu/" title="Seltsame Aktivitaeten vom San Diego Supercomputer Center [new window]" target="_blank">Grabber (SDSC)</a>',
+'google[_+\s]web[_+\s]preview', 'Google Web Preview',
+'gulliver','Northern Light Gulliver',
+'virus[_+\s]detector','<a href="http://www.securecomputing.com/" title="virus_harvester@securecomputing.com; Bot home page [new window]" target="_blank">virus_detector</a>',
+'harvest','Harvest',
+'htdig','ht://Dig',
+'jeeves','<a href="http://sp.ask.com/docs/about/tech_crawling.html" title="Bot home page [new window]" target="_blank">Ask</a>',
+'linkwalker','LinkWalker',
+'lilina','Lilina',
+'lycos[_+\s]','Lycos',
+'moget','moget',
+'muscatferret','Muscat Ferret',
+'myweb','Internet Shinchakubin',
+'nomad','Nomad',
+'scooter','Scooter',
+'slurp','<a href="http://help.yahoo.com/help/us/ysearch/slurp/" title="Bot home page [new window]" target="_blank">Yahoo Slurp</a>',
+'^voyager\/','Voyager',
+'weblayers','Weblayers',
+# Common robots (Not in robot file)
+'antibot','Antibot',
+'bruinbot','<a href="http://web.archive.org/" title="BruinBot home page [new window]" target="_blank">The web archive</a>',
+'digout4u','Digout4u',
+'echo!','EchO!',
+'fast\-webcrawler','Fast-Webcrawler',
+'ia_archiver\-web\.archive\.org','<a href="http://web.archive.org/" title="Bot home page [new window]" target="_blank">The web archive (IA Archiver)</a>',
+'ia_archiver','<a href="http://www.alexa.com/" title="Bot home page [new window]" target="_blank">Alexa (IA Archiver)</a>',
+'jennybot','JennyBot',
+'mercator','Mercator',
+'msnbot\-media','<a href="http://search.msn.com/msnbot.htm" title="Bot home page [new window]" target="_blank">MSNBot-media</a>',
+'msnbot-udiscovery', '<a href="http://search.msn.com/msnbot.htm" title="Feb 18, 2015: UA contains indentification during robots.txt access only." target="_blank">msnbot-UDiscovery</a> Note: AWStats counts most of its traffic as user traffic',
+'msnbot','<a href="http://search.msn.com/msnbot.htm" title="Bot home page [new window]" target="_blank">MSNBot</a>',
+'netcraft','<a href="http://www.netcraft.com/survey/" title="Bot home page [new window]" target="_blank">Netcraft</a>',
+'petersnews','Petersnews',
+'unlost_web_crawler','Unlost Web Crawler',
+'voila','Voila',
+'webbase', 'WebBase',
+'zyborg','<a href="http://www.WISEnutbot.com/" title="wn-14.zyborg@looksmart.net Bot home page [new window]" target="_blank">ZyBorg</a>',
+'wisenutbot','<a href="http://www.WISEnutbot.com/" title="Bot home page [new window]" target="_blank">WISENutbot</a>',
+'webcollage','<a href="http://www.jwz.org/webcollage/" title="WebCollage home page [new window]" target="_blank">WebCollage</a>',
+'cfetch','<a href="http://www.kosmix.com/crawler.html" title="kosmix home page [new window]" target="_blank">Cfetch</a>',
+# Less common robots (In robot file)
+'007ac9',  '<a href="http://crawler.007ac9.net/" rel="nofollow" title="007ac9 Crawler Page [new window]" target="_blank">007ac9 Crawler</a>, seems to belong to <a href="http://www.sistrix.com/" rel="nofollow" title="SISTRIX Home Page [new window]" target="_blank">SISTRIX</a>',
+'[^a]fish','Fish search',
+'abcdatos','ABCdatos BotLink',
+'abonti\.com','<a href="http://www.abonti.com/" title="Abonti WebSearch [new window]" target="_blank">Abonti WebSearch</a>',
+'acme\.spider','Acme.Spider',
+'ahoythehomepagefinder','Ahoy! The Homepage Finder',
+'ahrefsbot', '<a href="http://ahrefs.com/robot/" title="Bot home page [new window]" target="_blank">AhrefsBot</a>',
+'alkaline','Alkaline',
+'anthill','Anthill',
+'arachnophilia','Arachnophilia',
+'arale','Arale',
+'araneo','Araneo',
+'aretha','Aretha',
+'ariadne','ARIADNE',
+'powermarks','<a href="http://www.kaylon.com/power.html" title="Bot home page [new window]" target="_blank">Powermarks</a>', # must come before Arks; seen used by referrer spam
+'arks','arks',
+'aspider','ASpider (Associative Spider)',
+'atn\.txt','ATN Worldwide',
+'atomz','Atomz.com Search Robot',
+'auresys','AURESYS',
+'backrub','BackRub',
+'bbot','BBot',
+'bigbrother','Big Brother',
+'blackwidow','BlackWidow',
+'blindekuh','Die Blinde Kuh',
+'bloodhound','Bloodhound',
+'borg\-bot','Borg-Bot',
+'brightnet','bright.net caching robot',
+'bspider','BSpider',
+'cactvschemistryspider','CACTVS Chemistry Spider',
+'calif[^r]','Calif',
+'cassandra','Cassandra',
+'cgireader','Digimarc Marcspider/CGI',
+'checkbot','Checkbot',
+'christcrawler','ChristCrawler.com',
+'churl','churl',
+'cienciaficcion','cIeNcIaFiCcIoN.nEt',
+'cms\scrawler', '<a href="http://www.cmscrawler.com" rel="nofollow" title="CMS Crawler Home Page [new window]" target="_blank">CMS Crawler</a>',
+'collective','Collective',
+'combine','Combine System',
+'conceptbot','Conceptbot',
+'coolbot','CoolBot',
+'core','Web Core / Roots',
+'cosmos','XYLEME Robot',
+'crazywebcrawler', '<a href="http://www.crazywebcrawler.com/" rel="nofollow" title="CrazyWeb Crawler Home Page [new window]" target="_blank">CrazyWeb Crawler</a>',
+'cruiser','Internet Cruiser Robot',
+'cusco','Cusco',
+'cyberspyder','CyberSpyder Link Test',
+'desertrealm','Desert Realm Spider',
+'deweb','DeWeb(c) Katalog/Index',
+'dienstspider','DienstSpider',
+'digger','Digger',
+'diibot','Digital Integrity Robot',
+'direct_hit','Direct Hit Grabber',
+'dnabot','DNAbot',
+'domainappender',  '<a href="http://www.profound.net/domainappender" rel="nofollow" title="DomainAppender Home Page [new window]" target="_blank">DomainAppender</a>',
+'download_express','DownLoad Express',
+'dragonbot','DragonBot',
+'dwcp','DWCP (Dridus\' Web Cataloging Project)',
+'e\-collector','e-collector',
+'ebiness','EbiNess',
+'elfinbot','ELFINBOT',
+'emacs','Emacs-w3 Search Engine',
+'emcspider','ananzi',
+'esther','Esther',
+'evliyacelebi','Evliya Celebi',
+'fastcrawler','FastCrawler',
+'feedcrawl','FeedCrawl by feed@aobo.com',
+'fdse','Fluid Dynamics Search Engine robot',
+'felix','Felix IDE',
+'fetchrover','FetchRover',
+'fido','fido',
+'finnish','Finnish',
+'fireball','KIT-Fireball',
+'fouineur','Fouineur',
+'francoroute','Robot Francoroute',
+'freecrawl','Freecrawl',
+'funnelweb','FunnelWeb',
+'gama','gammaSpider, FocusedCrawler',
+'gazz','gazz',
+'gcreep','GCreep',
+'getbot','GetBot',
+'geturl','GetURL',
+'golem','Golem',
+'gougou','GouGou',
+'grapnel','Grapnel/0.01 Experiment',
+'griffon','Griffon',
+'gromit','Gromit',
+'gulperbot','Gulper Bot',
+'hambot','HamBot',
+'havindex','havIndex',
+'hometown','Hometown Spider Pro',
+'htmlgobble','HTMLgobble',
+'hyperdecontextualizer','Hyper-Decontextualizer',
+'iajabot','iajaBot',
+'iaskspider','<a href="http://www.iask.com/" target="_blank">Sina Iask Spider</a>',
+'hl_ftien_spider','<a href="http://www.hylanda.com/" target="_blank">Hylanda</a>',
+'sogou','<a href="http://www.sogou.com/" target="_blank">Sogou Spider</a>',
+'icjobs\.de', '<a href="http://www.icjobs.de/" title="April 10, 2014: UA contains indentification during the first and second page access only." target="_blank">iCjobs Spider</a> Note: Most traffic counts as user traffic',
+#20130805 The user agent string of the icjobs-spider contained the
+#identifying string only when it accessed the robots.txt file.
+#When it accessed the actual content it did not identify itself as
+#a spider. Thus traffic of this spider was counted as user traffic.
+#The behavious seems to have changed now - the spider identifies itself
+#when it accesses content pages.
+#20141401 Behavior as before: Does identify itself when it accesses
+# robots.txt and the root page. The following traffic does not contain
+# the identification string and is therefore counted as user traffic.
+'iconoclast','Popular Iconoclast',
+'ilse','Ingrid',
+'imagelock','Imagelock',
+'incywincy','IncyWincy',
+'informant','Informant',
+'infoseek','InfoSeek Robot 1.0',
+'infoseeksidewinder','Infoseek Sidewinder',
+'infospider','InfoSpiders',
+'inspectorwww','Inspector Web',
+'intelliagent','IntelliAgent',
+'ips\-agent', 'ips-agent Verisign(?) - no reliable information found.',
+'irobot','I, Robot',
+'iron33','Iron33',
+'israelisearch','Israeli-search',
+'javabee','JavaBee',
+'jbot','JBot Java Web Robot',
+'jcrawler','JCrawler',
+'jobo','JoBo Java Web Robot',
+'jobot','Jobot',
+'joebot','JoeBot',
+'jubii','The Jubii Indexing Robot',
+'jumpstation','JumpStation',
+'kapsi','image.kapsi.net',
+'katipo','Katipo',
+'kilroy','Kilroy',
+'ko[_+\s]yappo[_+\s]robot','KO_Yappo_Robot',
+'kummhttp','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b" title="Bot documentation page [new window]" target="_blank">KummHttp</a>',
+'labelgrabber\.txt','LabelGrabber',
+'larbin','<a href="http://para.inria.fr/~ailleret/larbin/index-eng.html" title="Bot home page [new window]" target="_blank">larbin</a>',
+'legs','legs',
+'linkidator','Link Validator',
+'linkscan','LinkScan',
+'lockon','Lockon',
+'logo_gif','logo.gif Crawler',
+'macworm','Mac WWWWorm',
+'lmspider','<a href="http://www.nuance.com/" title="Bot home page lmspider@scansoft.com [new window]" target="_blank">lmspider</a>',
+'lwp\-request','<a href="http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request" title="lwp-request home page [new window]" target="_blank">lwp-request</a>',
+'lwp\-trivial','<a href="http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm" title="lwp-trivial home page [new window]" target="_blank">lwp-trivial</a>',
+'magpie','<a href="http://magpierss.sf.net/" title="Bot home page [new window]" target="_blank">MagpieRSS</a>',
+'marvin','marvin/infoseek',
+'mattie','Mattie',
+'mediafox','MediaFox',
+'merzscope','MerzScope',
+'meshexplorer','NEC-MeshExplorer',
+'mindcrawler','MindCrawler',
+'mnogosearch','mnoGoSearch search engine software',
+'momspider','MOMspider',
+'monster','Monster',
+'motor','Motor',
+'muncher','Muncher',
+'mwdsearch','Mwd.Search',
+'ndspider','NDSpider',
+'nederland\.zoek','Nederland.zoek',
+'netcarta','NetCarta WebMap Engine',
+'netmechanic','<a href="http://www.netmechanic.com/" title="Bot home page [new window]" target="_blank">NetMechanic</a>',
+'netscoop','NetScoop',
+'newscan\-online','newscan-online',
+'nhse','NHSE Web Forager',
+'northstar','The NorthStar Robot',
+'nzexplorer','nzexplorer',
+'objectssearch','ObjectsSearch',
+'occam','Occam',
+'octopus','HKU WWW Octopus',
+'openfind','Openfind data gatherer',
+'orb_search','Orb Search',
+'packrat','Pack Rat',
+'pageboy','PageBoy',
+'parasite','ParaSite',
+'patric','Patric',
+'pegasus','pegasus',
+'perignator','The Peregrinator',
+'perlcrawler','PerlCrawler 1.0',
+'phantom','Phantom',
+'phpdig','PhpDig',
+'piltdownman','PiltdownMan',
+'pimptrain','Pimptrain.com\'s robot',
+'pioneer','Pioneer',
+'pitkow','html_analyzer',
+'pjspider','Portal Juice Spider',
+'plumtreewebaccessor','PlumtreeWebAccessor',
+'poppi','Poppi',
+'portalb','PortalB Spider',
+'psbot','<a href="http://www.picsearch.com/bot.html" title="Bot home page" target="_blank">psbot</a>',
+'python','<a href="http://docs.python.org/library/urllib.html" title="Tools developed using a Python library" target="_blank">Python-urllib</a>',
+'raven','Raven Search',
+'rbse','RBSE Spider',
+'resumerobot','Resume Robot',
+'rhcs','RoadHouse Crawling System',
+'road_runner','Road Runner: The ImageScape Robot',
+'robbie','Robbie the Robot',
+'robi','ComputingSite Robi/1.0',
+'robocrawl','RoboCrawl Spider',
+'robofox','RoboFox',
+'robozilla','Robozilla',
+'roverbot','Roverbot',
+'rules','RuLeS',
+'safetynetrobot','SafetyNet Robot',
+'semalt', '<a href="http://semalt.semalt.com/" rel="nofollow" title="seamalt.com Home Page [new window]" target="_blank">seamalt.com</a>',
+'search\-info','Sleek',
+'search_au','Search.Aus-AU.COM',
+'searchprocess','SearchProcess',
+'senrigan','Senrigan',
+'sgscout','SG-Scout',
+'shaggy','ShagSeeker',
+'shaihulud','Shai\'Hulud',
+'sift','Sift',
+'simbot','Simmany Robot Ver1.0',
+'sistrix', '<a href="http://crawler.sistrix.net/" rel="nofollow" title="SISTRIX Crawler Page [new window]" target="_blank">SISTRIX Crawler</a>',
+'site\-valet','Site Valet',
+'sitetech','SiteTech-Rover',
+'skymob','Skymob.com',
+'slcrawler','SLCrawler',
+'smartspider','Smart Spider',
+'snooper','Snooper',
+'solbot','Solbot',
+'speedy','<a href="http://www.entireweb.com/about/search_tech/speedyspider/" title="Speedy Spider home page [new window]" target="_blank">Speedy Spider</a>',
+'spider[_+\s]monkey','Spider monkey',
+'spiderbot','SpiderBot',
+'spiderline','Spiderline Crawler',
+'spiderlytics', 'Spiderlytics: No homepage, e-mail only: spider (at) spiderlytics.com',
+'spiderman','<a href="http://www.iscrawling.com" title="Spiderman home page [new window]" target="_blank">Spiderman</a>',
+'spiderview','SpiderView(tm)',
+'spry','Spry Wizard Robot',
+'ssearcher','Site Searcher',
+'sqworm','<a href="http://www.websense.com/" title="Bot home page (source: http://www.pgts.com.au/) [new window]" target="_blank">Sqworm</a>',
+'suke','Suke',
+'sunrise','<a href="http://www.sunrisexp.com/" title="Sunrise home page [new window]" target="_blank">Sunrise</a>',
+'suntek','suntek search engine',
+'sven','Sven',
+'tach_bw','TACH Black Widow',
+'tagyu_agent','<a href="http://www.tagyu.com/" title="Bot home page [new window]" target="_blank">Tagyu Agent</a>',
+'tarantula','Tarantula',
+'tarspider','tarspider',
+'tailrank','<a href="http://tailrank.com/robot">TailRank</a>',
+'techbot','TechBOT',
+'templeton','Templeton',
+'titan','TITAN',
+'titin','TitIn',
+'tkwww','The TkWWW Robot',
+'tlspider','TLSpider',
+'ucsd','UCSD Crawl',
+'udmsearch','UdmSearch',
+'universalfeedparser','<a href="http://feedparser.org/" title="Bot home page [new window]" target="_blank">UniversalFeedParser</a>',
+'urlck','URL Check',
+'valkyrie','Valkyrie',
+'verticrawl','Verticrawl',
+'victoria','Victoria',
+'visionsearch','vision-search',
+'voidbot','void-bot',
+'vwbot','VWbot',
+'w3index','The NWI Robot',
+'w3m2','W3M2',
+'wallpaper','WallPaper (alias crawlpaper)',
+'wanderer','the World Wide Web Wanderer',
+'wapspider','w@pSpider by wap4.com',
+'webbandit','WebBandit Web Spider',
+'webcatcher','WebCatcher',
+'webcopy','WebCopy',
+'webfetcher','webfetcher',
+'webfoot','The Webfoot Robot',
+'webinator','Webinator',
+'weblinker','WebLinker',
+'webmirror','WebMirror',
+'webmoose','The Web Moose',
+'webquest','WebQuest',
+'webreader','Digimarc MarcSpider',
+'webreaper','WebReaper',
+'websnarf','Websnarf',
+'webspider','WebSpider',
+'webvac','WebVac',
+'webwalk','webwalk',
+'webwalker','WebWalker',
+'webwatch','WebWatch',
+'whatuseek','whatUseek Winona',
+'whowhere','WhoWhere Robot',
+'wired\-digital','Wired Digital',
+'wmir','w3mir',
+'wolp','WebStolperer',
+'wombat','The Web Wombat',
+'wordpress','<a href="http://wordpress.org/" title="WordPress home page [new window]" target="_blank">WordPress</a>',
+'worm','The World Wide Web Worm',
+'woozweb','Woozweb Monitoring',
+'wwwc','WWWC Ver 0.2.5',
+'wz101','WebZinger',
+'xenu\slink\ssleuth', '<a href="http://home.snafu.de/tilman/xenulink.html" rel="nofollow" title="Description, Download, FAQ Page [new window]" target="_blank">Xenu'. "'" . 's Link Sleuth <sup>(TM)</sup></a>, see <a href="http://en.wikipedia.org/wiki/Xenu%27s_Link_Sleuth" rel="nofollow" title="Wikipedia on Xenu'. "'" . 's Link Sleuth [new window]" target="_blank">Wikipedia</a>',
+'xget','XGET',
+# Other robots reported by users
+'^finbot', '<span title="As on Sep. 10, 2015, the user agent string did not contain a web address.">finbot</span>',
+'^webindex$', '<span title="As on Oct. 28, 2015, the user agent string did not contain a web address.">WebIndex</span>',
+'1\-more_scanner','<a href="http://www.myzips.com/software/1-More-Scanner.phtml" title="1-More Scanner home page [new window]" target="_blank">1-More Scanner</a>',
+'360spider','<a href="https://www.google.com/search?q=360spider+-Ferrari" title="No home page, using Google search instead [new window]" target="_blank">360spider</a>',
+'a6-indexer',  '<a href="http://www.a6corp.com/a6-web-scraping-policy/" rel="nofollow" title="A6-Indexer [new window]" target="_blank">A6-Indexer</a>',
+'accoona\-ai\-agent','<a href="http://www.accoona.com/" title="Accoona-AI-Agent home page [new window]" target="_blank">Accoona-AI-Agent</a>',
+'activebookmark','<a href="http://www.libmaster.com/active_bookmark.php" title="ActiveBookmark home page [new window]" target="_blank">ActiveBookmark</a>',
+'adamm_bot','<a href="http://home.blic.net/adamm/" title="Bot home page [new window]" target="_blank">AdamM Bot</a>',
+'adsbot-google', '<a href="http://www.google.com/adsbot.html" rel="nofollow" title="AdsBot-Google home page [new window]" target="_blank">AdsBot-Google</a>',
+'advbot', '<a href="http://advbot.net/bot.html" rel="nofollow" title="AdvBot Home Page [new window]" target="_blank">AdvBot</a>',
+'affectv\.co\.uk', '<a href="http://www.affectv.co.uk" rel="nofollow" title="affectv.co.uk Home Page [new window]" target="_blank">affectv.co.uk</a>',
+'almaden','<a href="http://www.almaden.ibm.com/cs/crawler" title="IBM Almaden Research Center WebFountain&trade; Bot home page [new window]" target="_blank">IBM Almaden</a> Research Center WebFountain&trade;',
+'aipbot','<a href="http://www.aipbot.com/" title="aipbot@aipbot.com Bot home page [new window]" target="_blank">aipbot</a>',
+'aleadsoftbot','<a href="http://www.aleadsoft.com/bot.htm" title="ALeadSoftbot home page [new window]" target="_blank">ALeadSoftbot</a>',
+'alpha_search_agent','Alpha Search Agent',
+'allrati','Allrati',
+'aport', 'Aport',
+'applebot', '<a href="http://www.apple.com/go/applebot" rel="nofollow" title="Applebot Home Page [new window]" target="_blank">Applebot</a>',
+'archive\-de\.com',  '<a href="http://archive-de.com/bot" rel="nofollow" title="Archive-de.com Home Page [new window]" target="_blank">Archive-de.com</a>',
+'archive\.org_bot','<a href="http://crawls.archive.org/collections/bncf/crawl.html" title="Bot home page [new window]" target="_blank">archive.org bot</a>',
+'argus','<a href="http://www.simpy.com/bot.html" title="feedback@simpy.com Bot home page [new window]" target="_blank">Argus</a>',
+'arianna\.libero\.it','<a href="http://arianna.libero.it/" title="Bot home page [new window]" target="_blank">arianna.libero.it</a>',
+'aspseek','<a href="http://www.aspseek.org/" title="Bot home page [new window]" target="_blank">ASPseek</a>',
+'asterias', 'Asterias',
+'awbot', 'AWBot',
+'backlinktest\.com', '<a href="http://www.backlinktest.com/crawler.html" title="BacklinkCrawler [new window]" target="_blank">BacklinkCrawler</a>',
+'baiduspider','<a href="http://www.baidu.com/search/spider.html" title="Bot home page [new window]" target="_blank">BaiDuSpider</a>',
+'becomebot', '<a href="http://www.become.com/site_owners.html" title="Bot home page [new window]" target="_blank">BecomeBot</a>',
+'bender','<a href="http://bender.ucr.edu/" title="Bot home page [new window]" target="_blank">bender</a> <a href="http://ivia.ucr.edu/manuals/NiFC/current/index.shtml" title="Bot home page [new window]" target="_blank">focused_crawler</a>',
+'betabot','BetaBot',
+'biglotron','<a href="http://www.biglotron.com/robot.html" title="Bot home page [new window]" target="_blank">Biglotron</a>',
+'bittorrent_bot','<a href="http://www.bittorrent.com/" title="Bot home page [new window]" target="_blank">BitTorrent Bot</a>',
+'biz360[_+\s]spider','<a href="http://www.biz360.com/" title="blogsmanager@biz360.com Bot home page [new window]" target="_blank">Biz360 spider</a>',
+'blexbot', '<a href="http://webmeup-crawler.com" rel="nofollow" title="BLEXBot Home Page [new window]" target="_blank">BLEXBot</a>, seems to belong to the <a href="http://webmeup.com/" rel="nofollow" title="WebMeUp Home Page [new window]" target="_blank">WebMeUp backlink tool</a>',
+'blogbridge[_+\s]service','<a href="http://www.blogbridge.com/" title="Bot home page [new window]" target="_blank">BlogBridge Service</a>',
+'bloglines','<a href="http://www.bloglines.com/" title="Bot home page [new window]" target="_blank">Bloglines</a>',
+'blogpulse','<a href="http://www.intelliseek.com/" title="Bot home page [new window]" target="_blank">BlogPulse ISSpider intelliseek.com</a>',
+'blogsearch','<a href="http://www.icerocket.com/" title="Bot home page [new window]" target="_blank">BlogSearch</a>',
+'blogshares','<a href="http://blogshares.com/help.php?node=7" title="Bot home page [new window]" target="_blank">Blogshares Spiders</a>',
+'blogslive','<a href="http://www.blogslive.com/" title="info@blogslive.com Bot home page [new window]" target="_blank">Blogslive</a>',
+'blogssay','<a href="http://www.blogssay.com/" title="Bot home page [new window]" target="_blank">BlogsSay :: RSS Search Crawler</a>',
+'bncf\.firenze\.sbn\.it\/raccolta\.txt','<a href="http://www.bncf.firenze.sbn.it/raccolta.txt" title="Bot home page [new window]" target="_blank">Biblioteca Nazionale Centrale di Firenze</a>',
+'bobby', 'Bobby',
+'boitho\.com\-dc','<a href="http://www.boitho.com/dcbot.html" title="Bot home page [new window]" target="_blank">boitho.com-dc</a>',
+'bookmark\-manager','<a href="http://bkm.sourceforge.net/" title="Bookmark-Manager home page [new window]" target="_blank">Bookmark-Manager</a>',
+'boris', 'Boris',
+'bubing', '<a href="http://law.di.unimi.it/BUbiNG.html" title="BUbiNG [new window]" target="_blank">BUbiNG</a>',
+'bumblebee', 'Bumblebee (relevare.com)',
+'candlelight[_+\s]favorites[_+\s]inspector','<a href="http://www.candlelight.com/home.html" title="Candlelight_Favorites_Inspector  home page [new window]" target="_blank">Candlelight_Favorites_Inspector</a>',
+'careerbot',  '<a href="http://www.career-x.de/bot.html" rel="nofollow" title="CareerBot home page [new window]" target="_blank">CareerBot</a>',
+'cbn00glebot','cbn00glebot',
+'ccbot', '<a href="http://commoncrawl.org/faq/" rel="nofollow" title="Common Crawl FAQ Page [new window]" target="_blank">Common Crawl</a>',
+'cerberian_drtrs','<a href="http://www.pgts.com.au/cgi-bin/psql?robot_info=25240" title="Bot home page [new window]" target="_blank">Cerberian Drtrs</a>',
+'cfnetwork','<a href="http://www.cocoadev.com/index.pl?CFNetwork" title="CFNetwork home page [new window]" target="_blank">CFNetwork</a>',
+'cipinetbot','<a href="http://www.cipinet.com/bot.html" title="CipinetBot home page [new window]" target="_blank">CipinetBot</a>',
+'checkweb_link_validator','<a href="http://p.duby.free.fr/chkweb.htm" title="CheckWeb link validator home page [new window]" target="_blank">CheckWeb link validator</a>',
+'cliqzbot', '<a href="http://cliqz.com/company/cliqzbot" rel="nofollow" title="Cliqzbot Home Page [new window]" target="_blank">Cliqzbot</a>',
+'commons\-httpclient','<a href="http://jakarta.apache.org/commons/httpclient/" title="Bot home page [new window]" target="_blank">Jakarta commons-httpclient</a>',
+'computer_and_automation_research_institute_crawler','<a href="http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html" title="Computer and Automation Research Institute Crawler home page [new window]" target="_blank">Computer and Automation Research Institute Crawler</a>',
+'converamultimediacrawler','<a href="http://www.authoritativeweb.com/crawl/" title="ConveraMultiMediaCrawler home page [new window]" target="_blank">ConveraMultiMediaCrawler</a>',
+'converacrawler','<a href="http://www.authoritativeweb.com/crawl/" title="ConveraCrawler home page [new window]" target="_blank">ConveraCrawler</a>',
+'copubbot', '<a href="http://www.copub.com/bot.php" rel="nofollow" title="CoPubbot Home Page [new window] Note: Access to bot home page gave a 404 error on Dec 21, 2013" target="_blank">CoPubbot</a>',
+'cscrawler','CsCrawler',
+'cse_html_validator_lite_online','<a href="http://online.htmlvalidator.com/php/onlinevallite.php" title="CSE HTML Validator Lite Online home page [new window]" target="_blank">CSE HTML Validator Lite Online</a>','cuasarbot','<a href="http://www.cuasar.com/" title="Cuasarbot home page [new window]" target="_blank">Cuasarbot</a>',
+'cursor','<a href="http://adcenter.hu/docs/en/bot.html " title="Cursor home page [new window]" target="_blank">Cursor</a>',
+'custo','<a href="http://www.netwu.com/custo/" title="Custo home page [new window]" target="_blank">Custo</a>',
+'datafountains\/dmoz_downloader','<a href="http://infomine.ucr.edu/ " title="DataFountains/DMOZ Downloader home page [new window]" target="_blank">DataFountains/DMOZ Downloader</a>',
+'dataprovider\.com', '<a href="http://www.dataprovider.com/" title="Dataprovider Site Explorer [new window]" target="_blank">Dataprovider Site Explorer</a>',
+'daumoa', '<a href="http://tab.search.daum.net/aboutWebSearch.html" title="Daum [new window]" target="_blank">Daum</a>',
+'daviesbot', 'DaviesBot',
+'daypopbot', 'DayPop',
+'deepindex','<a href="http://www.deepindex.net/faq.php" title="Deepindex home page [new window]" target="_blank">Deepindex</a>',
+'deusu', '<a href="https://deusu.de/robot.html" rel="nofollow" title="DeuSu [new window]" target="_blank">DeuSu</a>',
+'dipsie\.bot','<a href="http://www.dipsie.com/bot/" title="Bot home page [new window]" target="_blank">Dipsie</a>',
+'dnsgroup','<a href="http://www.dnsgroup.com/" title="DNSGroup home page [new window]" target="_blank">DNSGroup</a>',
+'doccheckbot', 'doccheckbot/1.0, known to <a href="http://www.projecthoneypot.org/ip_46.229.160.208" rel="nofollow" title="Info to IP 46.229.160.208 [new window]" target="_blank">Project Honey Pot</a>',
+'domainchecker','<a href="http://net-promoter.com/" title="DomainChecker home page (not confirmed) [new window]" target="_blank">DomainChecker</a>',
+'domainsdb\.net','<a href="http://domainsdb.net/" title="Bot home page [new window]" target="_blank">DomainsDB.net</a>',
+'dotbot',  '<a href="http://www.opensiteexplorer.org/dotbot" rel="nofollow" title="Home Page [new window]" target="_blank">DotBot, Open Site Explorer</a>',
+'duckduckgo-favicons-bot', '<a href="http://duckduckgo.com" rel="nofollow" title="DuckDuckGo Home Page [new window]" target="_blank">DuckDuckGo-Favicons-Bot</a>',
+'dulance','<a href="http://www.dulance.com/bot.jsp" title="Bot home page [new window]" target="_blank">Dulance</a>',
+'dumbot','<a href="http://www.dumbfind.com/" title="Dumbot home page [new window]" target="_blank">Dumbot</a>',
+'dumm\.de\-bot','<a href="http://www.dumm.de/" title="dumm.de-Bot home page [new window]" target="_blank">dumm.de-Bot</a>',
+'earthcom\.info','<a href="http://www.earthcom.info/" title="Bot home page [new window]" target="_blank">EARTHCOM.info</a>',
+'easydl','<a href="http://keywen.com/Encyclopedia/Bot/" title="EasyDL  home page [new window]" target="_blank">EasyDL</a>',
+'eccp', '<a href="http://www.eniro.com/" rel="nofollow" title="Eniro Sverige home page [new window]" target="_blank">Eniro Sverige, email: search (at) eniro.com</a>',
+'edgeio\-retriever','<a href="http://www.edgeio.com/" title="Bot home page [new window]" target="_blank">edgeio-retriever</a>',
+'ernst[:blank:]2\.0', 'Ernst 2.0 (does not provide any further information)',
+'ets_v','<a href="http://www.freetranslation.com/help/" title="ETS home page [new window]" target="_blank">ETS</a> Enterprise Translation Server',
+'exactseek','ExactSeek Crawler',
+'extreme[_+\s]picture[_+\s]finder','<a href="http://www.exisoftware.com/" title="Extreme_Picture_Finder home page [new window]" target="_blank">Extreme_Picture_Finder</a>',
+'eventax','<a href="http://www.eventax.de/" title="eventax home page [new window]" target="_blank">eventax</a>',
+'everbeecrawler','EverbeeCrawler',
+'everest\-vulcan','<a href="http://everest.vulcan.com/crawlerhelp" title="Bot home page [new window]" target="_blank">Everest-Vulcan</a>',
+'ezresult', 'Ezresult',
+'enteprise','<a href="http://www.fastsearch.com/" title="Bot home page [new window]" target="_blank">Fast Enteprise Crawler</a>',
+'facebook','FaceBook bot',
+'facebot', '<a href="https://developers.facebook.com/docs/opengraph/howtos/maximizing-distribution-media-content" rel="nofollow" title=" Home Page [new window]" target="_blank">Facebot (Facebook bot?)</a>',
+'fast\-search\-engine','<a href="http://www.fast-search-engine.com/" title="Bot home page [new window]" target="_blank">Fast-Search-Engine</a> (not fastsearch.com)',
+'fast_enterprise_crawler','<a href="http://www.fast.no/" title="FAST Enterprise Crawler home page [new window]" target="_blank">FAST Enterprise Crawler</a>',
+'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de','<a href="http://www.telekom.de/" title="FAST Enterprise Crawler * crawleradmin.t-info@telekom.de home page [new window]" target="_blank">FAST Enterprise Crawler * crawleradmin.t-info@telekom.de</a>',
+'finderlein[_+\s]research[_+\s]crawler', 'Finderlein Research Crawler 1.0 (no contact information given)',
+'matrix_s\.p\.a\._\-_fast_enterprise_crawler','<a href="http://tin.virgilio.it/" title="Matrix S.p.A. - FAST Enterprise Crawler home page [new window]" target="_blank">Matrix S.p.A. - FAST Enterprise Crawler</a>',
+'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de','<a href="http://www.telekom.de/" title="FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de home page [new window]" target="_blank">FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de</a>',
+'fastbot', '<a href="http://www.fastbot.de" rel="nofollow" title="fastbot Home Page [new window]" target="_blank">fastbot</a>',
+'favicon','FavIconizer',
+'favorg','<a href="http://www.pcmag.com/article2/0,4149,108438,00.asp" title="FavOrg home page [new window]" target="_blank">FavOrg</a>',
+'favorites_sweeper','<a href="http://www.manitools.com/favsweep/" title="Favorites_Sweeper home page [new window]" target="_blank">Favorites Sweeper</a>',
+'feedburner', 'Feedburner',
+'feedfetcher\-google','<a href="http://www.google.com/feedfetcher.html" title="Bot home page [new window]" target="_blank">Feedfetcher-Google</a>',
+'feedflow','<a href="http://feedflow.com/about" title="Bot home page [new window]" target="_blank">FeedFlow</a>',
+'feedster','<a href="http://www.feedster.com/" title="Bot home page [new window]" target="_blank">Feedster</a>',
+'feedsky','<a href="http://www.feedsky.com/" title="Bot home page [new window]" target="_blank">FeedSky</a>',
+'feedvalidator','<a href="http://feedvalidator.org/" title="FeedValidator home page [new window]" target="_blank">FeedValidator</a>',
+'fetchbot', '<a href="https://github.com/PuerkitoBio/fetchbot" rel="nofollow" title="Fetchbot Home Page [new window]" target="_blank">Fetchbot</a>',
+'filmkamerabot','<a href="http://www.filmkamera.at/bot.html" title="FilmkameraBot home page [new window]" target="_blank">FilmkameraBot</a>',
+'filterdb\.iss\.net',  '<a href="http://filterdb.iss.net/crawler/" title="oBot Home Page [new window]" target="_blank">oBot</a>',
+'findexa_crawler','<a href="http://www.findexa.no/gulesider/article26548.ece " title="Findexa Crawler home page [new window]" target="_blank">Findexa Crawler</a>',
+'firmilybot', '<a href="http://www.firmily.com/bot.php" title="Firmily Bot [new window]" target="_blank">Firmily Bot Home page (Website was hacked on Oct. 19, 2013)</a>',
+'findlinks','<a href="http://wortschatz.uni-leipzig.de/findlinks/" title="Bot home page [new window]" target="_blank">Findlinks</a>',
+'foaf-search\.net', '<a href="http://www.foaf-search.net/" title="Friend of a friend (FOAF) search engine [new window]" target="_blank">Friend of a friend (FOAF) search engine</a>',
+'fooky\.com\/ScorpionBot','<a href="http://www.fooky.com/scorpionbots" title="Fooky.com/ScorpionBot/ScoutOut home page [new window]" target="_blank">Fooky.com/ScorpionBot/ScoutOut</a>',
+'g2crawler','<a href="http://crawler.instantnetworks.net/" title="Bot home page (nobody@airmail.net) [new window]" target="_blank">G2Crawler</a>',
+'gaisbot','<a href="http://gais.cs.ccu.edu.tw/robot.php" title="Bot home page [new window]" target="_blank">Gaisbot</a>',
+'geniebot','<a href="http://www.genieknows.com/" title="Bot home page [new window]" target="_blank">Geniebot</a>',
+'genieo', '<a href="http://www.genieo.com/webfilter.html" rel="nofollow" title="Genieo [new window]" target="_blank">Genieo</a>',
+'gigablastopensource', '<a href="http://www.gigablast.com/" rel="nofollow" title="Gigablast Home page [new window]"  target="_blank">GigablastOpenSource</a>, an Open Source Search Engine(<a href="https://github.com/gigablast/open-source-search-engine/wiki" rel="nofollow" title="at GitHub [new window]" target="_blank">Wiki</a>)',
+'gigabot','<a href="http://www.gigablast.com/spider.html" title="Bot home page [new window]" target="_blank">GigaBot</a>',
+'girafabot','<a href="http://www.girafa.com/" title="Bot home page [new window]" target="_blank">Girafabot</a>',
+'global_fetch','<a href="http://www.wesonet.com/" title="Global Fetch home page [new window]" target="_blank">Global Fetch</a>',
+'gnodspider','GNOD Spider',
+'goforit\.com','<a href="http://www.goforit.com/about/" title="GoForIt.com home page [new window]" target="_blank">GoForIt.com</a>',
+'goforitbot','<a href="http://www.goforit.com/about/" title="GOFORITBOT home page [new window]" target="_blank">GOFORITBOT</a>',
+'gonzo','<a href="http://www.suchen.de/faq.html" title="Bot home page [new windows]" target="_blank">suchen.de</a>',
+'gpu_p2p_crawler','<a href="http://gpu.sourceforge.net/search_engine.php" title="Bot home page [new window]" target="_blank">GPU p2p crawler</a>',
+'grapeshot', '<a href="http://www.grapeshot.co.uk/crawler.php" title="Grapeshot Crawler [new window]" target="_blank">Grapeshot Crawler</a>',
+'grub','Grub.org',
+'henrythemiragorobot', '<a href="http://www.miragorobot.com/scripts/mrinfo.asp" title="Bot home page [new window]" target="_blank">Mirago</a>',
+'heritrix','<a href="http://crawler.archive.org/" title="(used by a few different companies) Bot home page [new window]" target="_blank">Heritrix</a>',
+'holmes', 'Holmes',
+'hoowwwer','<a href="http://cosco.hiit.fi/search/hoowwwer/" title="HooWWWer home page [new window]" target="_blank">HooWWWer</a>',
+'hpprint','HPPrint',
+'htmlparser','<a href="http://htmlparser.sourceforge.net/" title="HTMLParser home page [new window]" target="_blank">HTMLParser</a>',
+'html[_+\s]link[_+\s]validator','<a href="http://www.lithopssoft.com/ " title="Html_Link_Validator home page [new window]" target="_blank">Html_Link_Validator</a>',
+'httrack','<a href="http://www.httrack.com/" title="Bot home page [new window]" target="_blank">HTTrack off-line browser</a>',
+'hundesuche\.com\-bot','<a href="http://www.hundesuche.com/" title="Hundesuche.com-Bot home page [new window]" target="_blank">Hundesuche.com-Bot</a>',
+'i-bot','i-bot',
+'icarus6j', 'Icarus6j, email address in UA string, no website',
+'ichiro','<a href="http://help.goo.ne.jp/door/crawlerE.html" title="Bot home page [new window]" target="_blank">ichiro</a>',
+'idmarch', '<a href="http://www.idmarch.org/bot.html" rel="nofollow" title=" Home Page [new window]" target="_blank">IDMARCH</a>',
+'iltrovatore\-setaccio','<a href="http://www.iltrovatore.it/aiuto/motore_di_ricerca.html" title="bot@iltrovatore.it IlTrovatore-Setaccio home page [new window]" target="_blank">IlTrovatore-Setaccio</a>',
+'implisensebot', '<span title="As on Sep. 18, 2015, the user agent string did not contain a web address.">ImplisenseBot</span>',
+'infobot','<a href="http://www.infobot.org/" title="InfoBot home page [new window]" target="_blank">InfoBot</a>',
+'infociousbot','<a href="http://corp.infocious.com/tech_crawler.php" title="InfociousBot home page [new window]" target="_blank">InfociousBot</a>',
+'infohelfer','<a href="http://www.infohelfer.de/crawler.php" title="Infohelfer home page [new window]" target="_blank">Infohelfer</a>',
+'infomine','<a href="http://infomine.ucr.edu/useragents" title="Bot home page [new window]" target="_blank">INFOMINE VLCrawler</a>',
+'insurancobot','<a href="http://www.fastspywareremoval.com/" title="InsurancoBot home page [new window]" target="_blank">InsurancoBot</a>',
+'integromedb\.org','<a href="http://www.integromedb.org/Crawler" title="IntegromeDB home page [new window]" target="_blank">IntegromeDB</a>',
+'internet[_+\s]ninja','<a href="http://www.dti.ne.jp/  " title="Internet_Ninja home page [new window]" target="_blank">Internet_Ninja </a>',
+'internetarchive','<a href="http://lucene.apache.org/nutch/bot.html " title="InternetArchive home page [new window]" target="_blank">InternetArchive</a>',
+'internetseer', 'InternetSeer',
+'internetsupervision','<a href="http://internetsupervision.com/" title="InternetSupervision home page [new window]" target="_blank">InternetSupervision</a>',
+'irlbot','<a href="http://irl.cs.tamu.edu/crawler" title="Bot home page [new window]" target="_blank">IRLbot</a>',
+'isearch2006','<a href="http://www.yahoo.com.cn/" title="isearch2006 home page [new window]" target="_blank">isearch2006</a>',
+'istellabot', '<a href="http://www.tiscali.it/" title="IstellaBot [new window]" target="_blank">IstellaBot</a>',
+'iupui_research_bot','<a href="http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/" title="IUPUI_Research_Bot home page [new window]" target="_blank">IUPUI_Research_Bot</a>',
+'izsearch', '<a href="http://izsearch.com/" rel="nofollow" title="iZSearch Home Page [new window]" target="_blank">iZSearch</a>',
+'james\sbot', '<a href="http://cognitiveseo.com/bot.html" rel="nofollow" title="James BOT Home Page [new window]" target="_blank">James BOT</a>',
+'jobboerse', '<a href="http://www.xn--jobbrse-d1a.com" rel="nofollow" title="Jobb&ouml;rse Home Page [new window]" target="_blank">Jobb&ouml;rse</a>',
+'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility','<a href="http://www.jrtwine.com/Products/CheckFavs/" title="JRTwine_Software_Check_Favorites_Utility  home page [new window]" target="_blank">JRTwine_Software_Check_Favorites_Utility</a>',
+'justview', 'JustView',
+'kalambot','<a href="http://64.124.122.251/feedback.html" title="KalamBot home page [new window]" target="_blank">KalamBot</a>',
+'kamano\.de_newsfeedverzeichnis','<a href="http://www.kamano.de/" title="kamano.de NewsFeedVerzeichnis home page [new window]" target="_blank">kamano.de NewsFeedVerzeichnis</a>',
+'kazoombot','<a href="http://www.kazoom.ca/bot.html" title="kazoombot@kazoom.ca KazoomBot home page [new window]" target="_blank">KazoomBot</a>',
+'kevin','<a href="http://dznet.com/kevin/" title="Kevin home page [new window]" target="_blank">Kevin</a>',
+'keyoshid','<a href="http://www.yahoo.co.jp/" title="Bot home page [new window]" target="_blank">Yahoo! Japan keyoshid robot study</a>',
+'kinjabot', 'Kinjabot',
+'kinja\-imagebot', 'Kinja Imagebot',
+'knowitall','<a href="http://www.cs.washington.edu/research/knowitall/" title="KnowItAll home page [new window]" target="_blank">KnowItAll</a>',
+'knowledge\.com','<a href="http://www.knowledge.com/" title="Knowledge.com home page [new window]" target="_blank">Knowledge.com</a>',
+'kouaa_krawler','<a href="http://www.kouaa.com/" title="Kouaa Krawler home page [new window]" target="_blank">Kouaa Krawler</a>',
+'krugle','<a href="http://www.krugle.com/crawler/info.html" title="Bot home page [new window]" target="_blank">Krugle</a>',
+'ksibot','<a href="http://ego.ms.mff.cuni.cz/" title="Bot home page [new window]" target="_blank">ksibot</a>',
+'kurzor','<a href="http://www.easymail.hu/" title="cursor@easymail.hu Kurzor home page [new window]" target="_blank">Kurzor</a>',
+'lanshanbot','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=%5Cbid_g_l_140406_1%5Cb" title="Bot Information [new window]" target="_blank">lanshanbot</a>',
+'letscrawl\.com','<a href="http://letscrawl.com/" title="Bot home page [new window]" target="_blank">LetsCrawl.com</a>',
+'libcrawl','Crawl libcrawl',
+'link_valet_online','<a href="http://www.htmlhelp.com/tools/valet/" title="Link Valet Online home page [new window]" target="_blank">Link Valet Online</a>',
+'linkbot','LinkBot',
+'linkdex\.com', '<a href="http://www.linkdex.com/about/bots/" title="Bot home page [new window]" target="_blank">Linkdex</a>',
+'linkchecker','<a href="http://linkchecker.sourceforge.net" title="Bot home page [new window]" target="_blank">LinkChecker</a>',
+'linkstats\sbot', '<span title="As on Nov. 7, 2015, the user agent string did not contain a web address.">LinkStats Bot</span>',
+'lipperhey', '<a href="http://www.lipperhey.com/" rel="nofollow" title="Lipperhey SEO Service Home Page [new window]" target="_blank">Lipperhey SEO Service</a>',
+'livejournal\.com', 'LiveJournal.com',
+'loadtimebot', '<a href="http://www.loadtime.net/bot.html" rel="nofollow" title="LoadTimeBot Home Page [new window]" target="_blank">LoadTimeBot</a>',
+'lssrocketcrawler', '<span title="Example UA-String &quot;LSSRocketCrawler/1.0 LightspeedSystems&quot;">LSSRocketCrawler (no contact information)</span>',
+'ltbot', '<a href="http://www.language-tools.com/" title="Language Tools Home Page [new window]" target="_blank">Language Tools Bot (ltbot)</a>',
+'ltx71', '<a href="http://ltx71.com/" rel="nofollow" title="ltx71 Home Page [new window]" target="_blank">ltx71</a>',
+'madaali\.de', '<a href="http://www.madaali.de/pfadzurbotseite/bot.html" rel="nofollow" title="Link resulted in a 404 Error on Nov 6, 2014 [new window]" target="_blank">www.madaali.de</a>',
+'magpierss', 'MagpieRSS',
+'mail\.ru', '<a href="http://go.mail.ru/help/robots" title="Mail.ru bot home page [new window]" target="_blank">Mail.ru bot</a>',
+'mapoftheinternet\.com','<a href="http://MapoftheInternet.com/" title="MapoftheInternet.com home page [new window]" target="_blank">MapoftheInternet.com</a>',
+'meanpathbot', '<a href="http://www.meanpath.com/meanpathbot.html" rel="nofollow" title="Meanpathbot Home Page [new window]" target="_blank">Meanpathbot</a>',
+'mediabot', '<a href="http://isdownload.biz" rel="nofollow" title="MediaBot refers to isdownload.biz [new window]" target="_blank">MediaBot</a>',
+'mediapartners\-google','<a href="https://adwords.google.com/" title="Bot home page [new window]" target="_blank">Google AdSense</a>',
+# 'Mediapartners-Google (Feb 12, 2015: no additial information in UA String, seems to use <a href="http://www.gigablast.com/" title="Gigablast Home page [new window]">GigablastOpenSource</a>',
+# Uses UA string "Mediapartners-Google" only, and there were accesses using an UA string "GigablastOpenSource/1.0" from the same IP-Address.
+# Therefore this is probably not related to Google 4.3.2015 Albrecht Müller
+'megaindex', '<a href="http://megaindex.com/crawler" rel="nofollow" title="MegaIndex Crawler Page [new window]" target="_blank">MegaIndex Crawler</a>, seems to belong to <a href="https://www.megaindex.ru/" rel="nofollow" title="MegaIndex.ru Home Page [new window]" target="_blank">MegaIndex.ru</a>',
+'megite','<a href="http://www.megite.com/" title="Megite home page [new window]" target="_blank">Megite</a>',
+'memorybot', '<a href="http://archivethe.net/en/index.php/about/internet_memory1" rel="nofollow" title="Archivethe.net Home Page [new window]" target="_blank">Archivethe.net</a>',
+'metager2-verification-bot', '<a href="http://metager2.de/technology.php" rel="nofollow" title="metager2-verification-bot Home Page [new window]" target="_blank">metager2-verification-bot</a>',
+'metager\-linkchecker','MetaGer LinkChecker',
+'metajobbot', '<a href="http://www.metajob.de/crawler" rel="nofollow" title="MetaJobBot [new window]" target="_blank">MetaJobBot</a>',
+'metaspinner','<a href="http://index.meta-spinner.de/" title="Metaspinner home page [new window]" target="_blank">Metaspinner</a>',
+'miadev',  '<a href="http://www.mia-marktplatz.de/spider" rel="nofollow" title="MiaDev spider [new window]" target="_blank">MiaDev spider</a>',
+'microsoft\sbits', '<a href="http://msdn.microsoft.com/en-us/library/bb968799%28v=vs.85%29.aspx" rel="nofollow" title="Microsoft Background Intelligent Transfer Service (BITS)? [new window]" target="_blank">Microsoft Background Intelligent Transfer Service (BITS)?</a>',
+'microsoft.*discovery', '<a href="http://support.microsoft.com/kb/838028/en-us" title="Microsoft KB838028 [new window]" target="_blank">Microsoft Office Protocol Discovery</a>/<a href="http://blogs.msdn.com/b/vsofficedeveloper/archive/2008/03/11/office-existence-discovery-protocol.aspx" title="Description of the Microsoft Office Existence Discovery [new window]" target="_blank">Microsoft Office Existence Discovery</a>',
+'microsoft[_+\s]url[_+\s]control','<a href="http://www.webmasterworld.com/forum11/1005.htm" title="Microsoft URL Control  home page [new window]" target="_blank">Microsoft URL Control</a>',
+'mindupbot', '<a href="http://datenbutler.de" rel="nofollow" title="DATENBUTLER home page [new window]" target="_blank">mindUpBot (datenbutler.de)</a>',
+'minirank','<a href="http://minirank.com/" title="miniRank home page [new window]" target="_blank">miniRank</a>',
+'mini\-reptile','Mini-reptile',
+'missigua_locator','<a href="http://www.webmasterworld.com/forum11/2690.htm" title="Missigua_Locator  home page [new window]" target="_blank">Missigua_Locator</a>',
+'misterbot','<a href="http://www.misterbot.fr/" title="Misterbot home page [new window]" target="_blank">Misterbot</a>',
+'miva','<a href="http://www.miva.com/" title="Miva home page [new window]" target="_blank">Miva</a>',
+'mizzu_labs','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_m_141105_2\b " title="Mizzu Labs home page [new window]" target="_blank">Mizzu Labs</a>',
+'mj12bot','<a href="http://majestic12.co.uk/bot.php" title="Bot home page. [new window]" target="_blank">MJ12bot</a>',
+'mojeekbot','<a href="http://www.mojeek.com/bot.html" title="Bot home page. [new window]" target="_blank">MojeekBot</a>',
+'msiecrawler','<a href="http://msdn.microsoft.com/workshop/delivery/offline/linkrel.asp" title="Bot home page. [new window]" target="_blank">MSIECrawler</a>',
+'ms[_+\s]search[_+\s]6\.0[_+\s]robot','<a href="http://support.microsoft.com/default.aspx?scid=kb;en-us;284022" title="Microsoft Support Page. [new window]" target="_blank">MS Search 6.0 Robot</a> (MS SharePoint Portal Server?)',
+'ms_search_4\.0_robot','<a href="http://support.microsoft.com/default.aspx?scid=kb;en-us;284022" title="Bot home page. [new window]" target="_blank">MS SharePoint Portal Server - MS Search 4.0 Robot</a>',
+'msrabot','msrabot',
+'msrbot','<a href="http://research.microsoft.com/research/sv/msrbot/" title="MSRBOT home page [new window]" target="_blank">MSRBOT</a>',
+'mt::telegraph::agent','MT::Telegraph::Agent',
+'mydoyouhike','<a href="http://www.doyouhike.net/my" title="Mydoyouhike home page [new window]" target="_blank">Mydoyouhike</a>',
+'nagios','Nagios',
+'nasa_search','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_140506_2\b" title="NASA Search home page [new window]" target="_blank">NASA Search</a>',
+'netestate\sne\scrawler','<a href="http://www.website-datenbank.de/" title="Website-Datenbank home page [new window]" target="_blank">Website-Datenbank</a>',
+'netluchs','<a href="http://www.netluchs.de/" title="Bot home page. [new window]" target="_blank">Netluchs</a>',
+'netsprint','<a href="http://www.netsprint.pl/serwis/" title="NetSprint home page [new window]" target="_blank">NetSprint</a>',
+'newsgatoronline', 'NewsGator Online',
+'nicebot','<a href="http://www.egghelp.org/setup.htm" title="Bot home page (there may be others) [new window]" target="_blank">nicebot</a>',
+'nimblecrawler','<a href="http://www.healthline.com/" title="NimbleCrawler home page [new window]" target="_blank">NimbleCrawler</a>',
+'noxtrumbot','<a href="http://www.noxtrum.com/" title="Bot home page [new window]" target="_blank">noxtrumbot</a>',
+'npbot','<a href="http://www.nameprotect.com/botinfo.html" title="NPBot home page [new window]" target="_blank">NPBot</a>',
+'loocalcrawler/nutch', '<a href="https://weluse.de/" rel="nofollow" title="https://weluse.de/ [new window]" target="_blank">LoocalCrawler/Nutch</a>',
+'nutchcvs','<a href="http://lucene.apache.org/nutch/bot.html" title="NutchCVS home page [new window]" target="_blank">NutchCVS</a>',
+'nutchosu\-vlib','<a href="http://lucene.apache.org/nutch/bot.html" title="NutchOSU-VLIB home page [new window]" target="_blank">NutchOSU-VLIB</a>',
+'nutch','<a href="http://lucene.apache.org/nutch/" title="Bot home page. Used by many, including Looksmart. [new window]" target="_blank">Nutch</a>',
+'ocelli','<a href="http://www.globalspec.com/Ocelli/" title="Ocelli home page [new window]" target="_blank">Ocelli</a>',
+'octora_beta_bot','<a href="http://www.octora.com/" title="Bot home page [new window]" target="_blank">Octora Beta Bot</a>',
+'omniexplorer[_+\s]bot','<a href="http://www.omni-explorer.com/" title="Bot home page. [new window]" target="_blank">OmniExplorer Bot</a>',
+'onet\.pl[_+\s]sa','<a href="http://szukaj.onet.pl/" title="Onet.pl_SA home page [new window]" target="_blank">Onet.pl_SA</a>',
+'onfolio','<a href="http://www.onfolio.com/" title="Bot home page [new window]">Onfolio</a>',
+'opentaggerbot','<a href="http://www.opentagger.com/opentaggerbot.htm" title="Bot home page [new window]">OpenTaggerBot</a>',
+'openwebspider','<a href="http://www.openwebspider.org/" title="OpenWebSpider home page [new window]" target="_blank">OpenWebSpider</a>',
+'optimizer', '<span title="As on Oct. 2, 2015, the user agent string did not contain a web address.">Optimizer</span>',
+'oracle_ultra_search','<a href="http://www.oracle.com/technology/products/ultrasearch/index.html" title="Oracle Ultra Search home page [new window]" target="_blank">Oracle Ultra Search</a>',
+'orangebot', 'OrangeBot, no website, log entry specifies mail address', # support.orangebot@orange.com
+'orbiter','<a href="http://www.dailyorbit.com/bot.htm" title="Orbiter home page [new window]" target="_blank">Orbiter</a>',
+'yodaobot','<a href="http://www.yodao.com/help/webmaster/spider/" title="YodaoBot">OutfoxBot/YodaoBot</a>',
+'qihoobot','<a href="http://www.qihoo.com/" title="QihooBot">QihooBot</a>',
+'qwantify', '<a href="https://www.qwant.com/" rel="nofollow" title="Qwant Home Page [new window]" target="_blank">Qwant</a>',
+'passwordmaker\.org','<a href="http://passwordmaker.org/" title="passwordmaker.org home page [new window]" target="_blank">passwordmaker.org</a>',
+'pear_http_request_class','<a href="http://pear.php.net/" title="PEAR HTTP Request class home page [new window]" target="_blank">PEAR HTTP Request class</a>',
+'peerbot','<a href="http://www.peerbot.com/" title="PEERbot home page [new window]" target="_blank">PEERbot</a>',
+'perman', 'Perman surfer',
+'php[_+\s]version[_+\s]tracker','<a href="http://www.nexen.net/phpversion/bot.php" title="PHP Version Tracker home page [new window]" target="_blank">PHP version tracker</a>',
+'phpcrawl', '<a href="http://phpcrawl.cuab.de/" rel="nofollow" title="PHPCrawl Home page [new window]" target="_blank">PHPCrawl</a>',
+'picmole', '<a href="http://www.picmole.com/" rel="nofollow" title="Bot home page. [new window]" target="_blank">Specified address www.picmole.com was not reachable on April 21, 2014</a>',
+'pictureofinternet','<a href="http://malfunction.org/poi/" title="PictureOfInternet home page [new window]" target="_blank">PictureOfInternet</a>',
+'ping\.blo\.gs','<a href="http://blo.gs/ping.php" title="Bot home page. [new window]" target="_blank">ping.blo.gs</a>',
+'plinki','<a href="http://www.plinki.com/" title="plinki home page [new window]" target="_blank">plinki</a>',
+'pluckfeedcrawler','<a href="http://www.pluck.com/" title="Bot home page. [new window]" target="_blank">PluckFeedCrawler</a>',
+'plukkie',  '<a href="http://www.botje.com/plukkie.htm" rel="nofollow" title="Plukkie [new window]" target="_blank">Plukkie</a>',
+'pogodak','<a href="http://www.pogodak.com" title="Pogodak home page [new window]" target="_blank">Pogodak.com</a>',
+'pompos','<a href="http://dir.com/pompos.html" title="Bot home page. [new window]" target="_blank">Pompos</a>',
+'popdexter','Popdexter',
+'port_huron_labs','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_1133\b" title="Port Huron Labs home page [new window]" target="_blank">Port Huron Labs</a>',
+'postfavorites','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_1135\b " title="PostFavorites home page [new window]" target="_blank">PostFavorites</a>',
+'projectwf\-java\-test\-crawler','ProjectWF-java-test-crawler',
+'proodlebot','<a href="http://www.proodle.com/" title="proodleBot home page [new window]" target="_blank">proodleBot</a>',
+'publiclibraryarchive', '<a href="http://publiclibraryarchive.org" rel="nofollow" title="On 23 June 2014 a page parked at GoDaddy [new window]" target="_blank">publiclibraryarchive.org (related to spiderlytics.com and/or waybackarchive.org?)</a>',
+#Observations 2014-06-23
+#Domain publiclibraryarchive.org is parked at GoDaddy.com
+#from https://www.projecthoneypot.org/
+#81.30.151.220's User Agent Strings (honeypot classified this ip as an mail server, active about 6 years ago)
+#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)
+#176.9.138.27's User Agent Strings
+#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)
+#Mozilla/5.0 (compatible; Spiderlytics/1.0; +spider@spiderlytics.com)
+#Mozilla/5.0 (compatible; waybackarchive.org/1.0; +spider@waybackarchive.org)
+#146.0.32.165's User Agent Strings
+#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)
+#Mozilla/5.0 (compatible; savetheworldheritage.org/1.0; +crawl@savetheworldheritage.org)
+#Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net)
+'pyquery','<a href="http://sourceforge.net/projects/pyquery/" title="PyQuery home page [new window]" target="_blank">PyQuery</a>',
+'rambler','<a href="http://www.rambler.ru/doc/faq.shtml" title="Bot home page [new window]">StackRambler</a>',
+'redalert','Red Alert',
+'relevantnoise\.com', '<a href="http://www.relevantnoise.com/" title="Relevant Noise [new window]" target="_blank">Relevant Noise</a>',
+'riddler', '<a href="http://riddler.io/about" rel="nofollow" title="Riddler [new window]" target="_blank">Riddler</a>',
+'rogerbot', '<a href="http://moz.com/help/pro/what-is-rogerbot-" rel="nofollow" title="Rogerbot Home Page [new window]" target="_blank">Rogerbot</a>',
+'rojo','<a href="http://rojo.com/" title="Bot home page [new window]" target="_blank">RoJo</a> aggregator',
+'rssimagesbot','<a href="http://herbert.groot.jebbink.nl/?app=rssImages" title="Bot home page [new window]" target="_blank">rssImagesBot</a>',
+'ruffle','<a href="http://www.unreach.net/" title="Bot home page [new window]" target="_blank">ruffle SemanticWeb crawler</a>',
+'rufusbot','<a href="http://64.124.122.252.webaroo.com/feedback.html" title="Bot home page [new window]" target="_blank">RufusBot Rufus Web Miner</a>',
+'safeads\.xyz', '<a href="http://www.safeads.xyz/" rel="nofollow" title="SafeAds.xyz [new window]" target="_blank">SafeAds.xyz</a>',
+'safesearch',  '<a href="https://safesearch.avira.com" rel="nofollow" title="Avira SafeSearch Home Page [new window]" target="_blank">Avira SafeSearch</a>',
+'sandcrawler','<a href="http://www.microsoft.com/" title="Bot home page [new window]" target="_blank">SandCrawler (Microsoft)</a>',
+'savetheworldheritage', '<a href="http://savetheworldheritage.org" rel="nofollow" title="On March 4, 2015 a page parked at GoDaddy [new window]" target="_blank">savetheworldheritage.org (related to spiderlytics.com, waybackarchive.org and/or publiclibraryarchive.org?)</a>',
+'sbider','<a href="http://www.sitesell.com/sbider.html" title="Bot home page [new window]" target="_blank">SBIder</a>',
+'schizozilla','<a href="http://spamhuntress.com/2005/03/18/gizmo/ " title="Schizozilla home page [new window]" target="_blank">Schizozilla</a>',
+'scumbot','Scumbot',
+'searchguild[_+\s]dmoz[_+\s]experiment','<a href="http://www.searchguild.com/" title="SearchGuild_DMOZ_Experiment  home page [new window]" target="_blank">SearchGuild_DMOZ_Experiment</a>',
+'searchmetricsbot','<a href="http://www.searchmetrics.com/en/searchmetrics-bot/" rel="nofollow" title="SearchmetricsBot [new window]" target="_blank">SearchmetricsBot</a>',
+'seekbot','<a href="http://www.seekbot.net/bot.html" title="Bot home page [new window]">Seekbot</a>',
+'semrushbot', '<a href="http://www.semrush.com/bot.html" rel="nofollow" title="SemrushBot [new window]" target="_blank">SemrushBot</a>',
+'sensis_web_crawler','<a href="http://www.sensis.com.au/" title="Sensis Web Crawler home page [new window]" target="_blank">Sensis Web Crawler</a>',
+'seodiver', '<a href="http://www.seodiver.com/bot" rel="nofollow" title="SEO DIVER Bot Home Page [new window]" target="_blank">SEO DIVER</a>',
+'seokicks\.de', '<a href="http://www.seokicks.de/robot.html" rel="nofollow" title="SEOkicks Webcrawler home page [new window]" target="_blank">SEOkicks Webcrawler</a>',
+'seoscanners', '<a href="http://seoscanners.net" rel="nofollow" title="On August 4, 2015 a page parked at GoDaddy [new window]" target="_blank">seoscanners.net</a> (related to publiclibraryarchive.org and savetheworldheritage.org?)',
+'seznambot','<a href="http://fulltext.seznam.cz/" title="Bot home page [new window]" target="_blank">SeznamBot</a>',
+'shim\-crawler','<a href="http://www.logos.ic.i.u-tokyo.ac.jp/crawler/" title="crawl@logos.ic.i.u-tokyo.ac.jp Bot home page [new window]" target="_blank">Shim-Crawler</a>',
+'shoutcast','Shoutcast Directory Service',
+'sitedomain-bot', '<a href="http://www.sitedomain.de/sitedomain-bot/" rel="nofollow" title="Sitedomain-Bot Home Page [new window]" target="_blank">Sitedomain.de</a>',
+'siteexplorer\.info', '<a href="http://siteexplorer.info/" title="Site Explorer home page [new window]" target="_blank">Site Explorer</a>',
+'skimbot', '<a href="http://www.skimlinks.com" rel="nofollow" title="SkimBot [new window]" target="_blank">SkimBot</a>',
+'slysearch','SlySearch',
+'smtbot', '<a href="http://www.similartech.com/smtbot" rel="nofollow" title="SMTBot Home Page [new window]" target="_blank">SMTBot</a>',
+'snap\.com_beta_crawler','<a href="http://www.snap.com/" title="snap.com beta crawler home page [new window]" target="_blank">snap.com beta crawler</a>',
+'sohu\-search','<a href="http://corp.sohu.com/" title="Bot home page [new window]" target="_blank">sohu-search</a>',
+'sohu','<a href="http://corp.sohu.com/" title="Bot home page [new window]" target="_blank">sohu agent</a>',
+'snappy','<a href="http://www.urltrends.com/faq.php" title="Bot home page [new window]" target="_blank">Snappy</a>',
+'spbot', '<a href="http://www.seoprofiler.com/bot" rel="nofollow" title="SEOprofiler Bot [new window]" target="_blank">SEOprofiler Bot</a>',
+'sphere_scout','<a href="http://www.sphere.com/" title="Bot home page [new window]" target="_blank">Sphere Scout</a>',
+'spip','<a href="http://www.spip.net" title="SPIP home page [new window]" target="_blank">SPIP</a>',
+'sproose_crawler','<a href="http://www.sproose.com/bot.html" title="Bot home page [new window]" target="_blank">sproose crawler</a>',
+'ssearch_bot', '<a href="http://www.semantissimo.de/" title="sSearch Crawler [new window]" target="_blank">sSearch Crawler</a>',
+'steroid__download','<a href="http://faqs.org.ru/progr/pascal/delphi_internet2.htm" title="STEROID  Download home page [new window]" target="_blank">STEROID  Download</a>',
+'steeler','<a href="http://www.tkl.iis.u-tokyo.ac.jp/~crawler/ " title="Steeler home page [new window]" target="_blank">Steeler</a>',
+'stq_bot', '<a href="http://www.searchteq.de" rel="nofollow" title="SEARCHTEQ Home Page [new window]" target="_blank">SEARCHTEQ</a>',
+'suchfin\-bot','<a href="http://www.suchfin.de/" title="Suchfin-Bot home page [new window]" target="_blank">Suchfin-Bot</a>',
+'superbot','<a href="http://www.sparkleware.com/superbot/" title="SuperBot home page [new window]" target="_blank">SuperBot</a>',
+'surveybot','SurveyBot',
+'susie','<a href="http://www.sync2it.com/bms/susie.php" title="Susie home page [new window]" target="_blank">Susie</a>',
+'syndic8','Syndic8',
+'syndicapi','<a href="http://syndicapi.com/bot.html" title="Bot home page [new window]" target="_blank">SyndicAPI</a>',
+'synoobot','<a href="http://www.synoo.de/bot.html" title="webmaster@synoo.com SynooBot home page [new window]" target="_blank">SynooBot</a>',
+'tcl_http_client_package','<a href="http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm" title="Tcl http client package home page [new window]" target="_blank">Tcl http client package</a>',
+'technoratibot', 'Technoratibot',
+'teragramcrawlersurf','<a href="http://www.teragram.com/" title="TeragramCrawlerSURF home page [new window]" target="_blank">TeragramCrawlerSURF</a>',
+'test_crawler','<a href="http://netp.ath.cx/" title="Test Crawler home page [new window]" target="_blank">Test Crawler</a>',
+'testbot','<a href="http://www.agbrain.com/" title="TestBot home page [new window]" target="_blank">TestBot</a>',
+'thumbsniper', '<a href="http://thumbsniper.com" rel="nofollow" title="ThumbSniper Home Page [new window]" target="_blank">ThumbSniper</a>',
+'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','<a href="http://www.thunderstone.com/" title="Bot home page. Used by many. [new window]" target="_blank">T-H-U-N-D-E-R-S-T-O-N-E</a>',
+'topicblogs', '<a href="http://www.topicblogs.com/" title="Bot home page [new window]" target="_blank">topicblogs</a>',
+'turnitinbot', '<a href="http://www.turnitin.com/robot/crawlerinfo.html" rel="nofollow" title="TurnitinBot Home Page [new window]" target="_blank">Turn It In</a>',
+'turtle', 'Turtle',
+'turtlescanner', 'Turtle',
+'tutorgigbot','<a href="http://www.tutorgig.info/" title="TutorGigBot home page [new window]" target="_blank">TutorGigBot</a>',
+'twiceler','<a href="http://www.cuill.com/twiceler/robot.html" title="Twiceler home page [new window]" target="_blank">twiceler</a>',
+'ubicrawler','<a href="http://law.dsi.unimi.it/ubicrawler/" title="Bot home page [new window]" target="_blank">UbiCrawler</a>',
+'ultraseek', 'Ultraseek',
+'unchaos_bot_hybrid_web_search_engine','<a href="http://www.unchaos.com/" title="UnChaos Bot Hybrid Web Search Engine home page [new window]" target="_blank">UnChaos Bot Hybrid Web Search Engine</a>',
+'unido\-bot','<a href="http://www.unchina.org/unido/unido/our_projects/3_3.html" title="unido-bot home page [new window]" target="_blank">unido-bot</a>',
+'unisterbot', 'UnisterBot; E-Mail only: crawler (at) unister.de',
+'updated','<a href="http://www.updated.com/" title="updated home page [new window]" target="_blank">updated</a>',
+'ustc\-semantic\-group','<a href="http://ai.ustc.edu.cn/mas/en/research/index.php" title="Bot home page [new window]" target="_blank">USTC-Semantic-Group</a>',
+'vagabondo\-wap','<a href="http://www.wise-guys.nl/Contact/index.php?botselected=webagents&amp;lang=uk" title="Bot home page [new window]" target="_blank">Vagabondo-WAP</a>',
+'vagabondo','<a href="http://www.wise-guys.nl/Contact/index.php?botselected=webagents&amp;lang=uk" title="Bot home page [new window]" target="_blank">Vagabondo</a>',
+'vebidoobot', '<span title="As on Oct. 27, 2015, the user agent string did not contain a web address.">vebidoobot</span>',
+'vermut','<a href="http://vermut.aol.com/" title="Bot home page [new window]" target="_blank">Vermut</a>',
+'versus_crawler_from_eda\.baykan@epfl\.ch','<a href="http://www.epfl.ch/Eindex.html  " title="versus crawler from eda.baykan@epfl.ch home page [new window]" target="_blank">versus crawler from eda.baykan@epfl.ch</a>',
+'vespa_crawler','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb" title="Bot home page [new window]" target="_blank">Vespa Crawler</a>',
+'voltron', '<span title="As on Oct. 21, 2015, the user agent string did not contain a web address.">voltron</span>',
+'vortex','<a href="http://marty.anstey.ca/projects/robots/vortex/" title="Bot home page [new window]" target="_blank">VORTEX</a>',
+'vse\/','<a href="http://www.vivisimo.com/" title="VSE home page [new window]" target="_blank">VSE</a>',
+'w3c\-checklink','<a href="http://validator.w3.org/checklink/" title="Bot home page [new window]" target="_blank">W3C Link Checker</a>',
+'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa', '<a href="http://jigsaw.w3.org/css-validator/" title="Bot home page [new window]" target="_blank">W3C jigsaw CSS Validator</a>',
+'w3c_validator','<a href="http://validator.w3.org/" title="Bot home page [new window]" target="_blank">W3C Validator</a>',
+'watchmouse', '<a href="http://www.watchmouse.com/en/" title="WatcMouse">WatchMouse Website Monitor</a>',
+'wavefire','<a href="http://www.wavefire.com" title="info@wavefire.com; Bot home page [new window]" target="_blank">Wavefire</a>',
+'waybackarchive\.org', '<span title="Maybe related to spiderlytics.">No website, email: spider(at)waybackarchive.org</span>',
+# 2.12.2013 Project Honeypot reports at least one of the IPs used by waybackarchive with a spiderlytics UA string.
+# Problably not related to the wayback machine of archive.org.
+'wbsearchbot', '<a href="http://www.warebay.com/bot.html" rel="nofollow" title="WBSearchBot [new window]" target="_blank">WBSearchBot</a>',
+'webclipping\.com', 'WebClipping.com',
+'webcompass', 'webcompass',
+'webcrawl\.net','<a href="http://www.webcrawl.net/" title="webcrawl.net home page [new window]" target="_blank">webcrawl.net</a>',
+'web_downloader','<a href="http://www.krasu.ru/soft/chuchelo/" title="Web Downloader home page [new window]" target="_blank">Web Downloader</a>',
+'webdup','<a href="http://www.webdup.com/en/index.html" title="Webdup home page [new window]" target="_blank">Webdup</a>',
+'webfilter','<a href="http://www.verso.com/enterprise/netspective/webfilter.asp" title="Bot home page [new window]" target="_blank">WebFilter</a>',
+'webindexer','<a href="mailto://webindexerv1@yahoo.com" title="WebIndexer home page [new window]" target="_blank">WebIndexer</a>',
+'webminer','<a href="http://64.124.122.252/feedback.html" title="WebMiner home page [new window]" target="_blank">WebMiner</a>',
+'website[_+\s]monitoring[_+\s]bot','<a href="http://InternetSupervision.com/UrlMonitor/3/" title="Website_Monitoring_Bot home page [new window]" target="_blank">Website_Monitoring_Bot</a>',
+'webvulncrawl', 'WebVulnCrawl',
+'wells_search','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_t_z_1484\b " title="Wells Search home page [new window]" target="_blank">Wells Search</a>',
+'wer-liefert-was', '<a href="http://www.wlw.de/extern/crawler/Wer-liefert-was-Crawler.html" rel="nofollow" title="Page given in UA string gave a 404 Error on July 2, 2015 [new window]" target="_blank">Wer-liefert-was Crawler</a> Note: AWStats counts most traffic as user traffic',
+'wesee:search', '<a href="http://www.wesee.com/en/support/bot/" title="WeSEE Bot Home Page (gave a 404-Error on Nov. 2, 2013) [new window]" target="_blank">WeSEE Bot</a>',
+'wevikabot', '<a href="http://www.wevika.de/" rel="nofollow" title="WeViKa Home Page [new window]" target="_blank">WeViKa</a>',
+'wonderer', 'Web Wombat Redback Spider',
+'wotbox', '<a href="http://www.wotbox.com/bot/" rel="nofollow" title="Wotbox Bot Home Page [new window]" target="_blank">Wotbox</a>',
+'wume_crawler','<a href="http://wume.cse.lehigh.edu/~xiq204/crawler/ " title="wume crawler home page [new window]" target="_blank">wume crawler</a>',
+'wwweasel',,'<a href="http://wwweasel.de/" title="Website_Monitoring_Bot home page [new window]" target="_blank">WWWeasel</a>',
+'xenu\'s_link_sleuth','<a href="http://home.snafu.de/tilman/xenulink.html" title="Xenu Link Sleuth home page [new window]" target="_blank">Xenu Link Sleuth</a>',
+'xenu_link_sleuth','<a href="http://home.snafu.de/tilman/xenulink.html" title="Xenu Link Sleuth home page [new window]" target="_blank">Xenu Link Sleuth</a>',
+'xirq','<a href="http://www.xirq.com/" title="xirq home page [new window]" target="_blank">xirq</a>',
+'xovibot', '<a href="http://www.xovibot.net/" rel="nofollow" title="XoviBot Home Page [new window]" target="_blank">XoviBot</a>',
+'y!j', '<a href="http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html" title="Bot home page [new window]" target="_blank">Y!J Yahoo Japan</a>',
+'yacy', '<a href="http://yacy.net/bot.html" rel="nofollow" title="YaCy Home Page [new window]" target="_blank">YaCy</a>',
+'yahoo\-blogs','<a href="http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html" title="Bot home page [new window]" target="_blank">Yahoo-Blogs</a>',
+'yahoo\-verticalcrawler', 'Yahoo Vertical Crawler',
+'yahoofeedseeker', '<a href="http://publisher.yahoo.com/rssguide" title="Bot home page [new window]" target="_blank">Yahoo Feed Seeker</a>',
+'yahooseeker\-testing', '<a href="http://search.yahoo.com/" title="Bot home page [new window]" target="_blank">YahooSeeker-Testing</a>',
+'yahooseeker', '<a href="http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html" title="Bot home page [new window]" target="_blank">YahooSeeker Yahoo! Blog crawler</a>',
+'yahoo\-mmcrawler', '<a href="mailto:mms-mmcrawler-support@yahoo-inc.com?subject=Yahoo-MMCrawler Information" title="E-mail Bot">Yahoo-MMCrawler</a>',
+'yahoo!_mindset','<a href="http://mindset.research.yahoo.com/" title="Bot home page [new window]">Yahoo! Mindset</a>',
+'yandex', '<a href="http://yandex.com/bots" title="Bot home page [new window]">Yandex Bot</a>',
+'flexum', 'Flexum Search Engine',
+'yanga', 'Yanga WorldSearch Bot',
+'yet-another-spider','<a href="http://188.40.112.195/" title="Yet-Another-Spider home page [new window]" target="_blank">Yet-Another-Spider</a>',
+'yisouspider', 'YisouSpider (no additional information in UA string)',
+'yooglifetchagent','<a href="http://www.yoogli.com/" title="yoogliFetchAgent home page [new window]" target="_blank">yoogliFetchAgent</a>',
+'z\-add_link_checker','<a href="http://w3.z-add.co.uk/linkcheck/" title="Z-Add Link Checker home page [new window]" target="_blank">Z-Add Link Checker</a>',
+'zealbot','ZealBot',
+'zhuaxia','<a href="http://www.zhuaxia.com/"  target="_blank">ZhuaXia</a>',
+'zspider','<a href="http://feedback.redkolibri.com/" title="Bot home page [new window]" target="_blank">zspider</a>',
+'zeus','<a href="http://www.webmasterworld.com/forum11/1840.htm" title="Bot documentation [new window]" target="_blank">Zeus Webster Pro</a>',
+'zumbot','<a href="http://help.zum.com/inquiry" title="ZumBot home page [new window]" target="_blank">ZumBot</a>',
+'ng\/1\.','<a href="http://www.exabot.com/" title="Bot home page [new window]" target="_blank">NG 1.x (Exalead)</a>', # put at end to avoid false positive
+'ng\/2\.','<a href="http://www.exabot.com/" title="Bot home page [new window]" target="_blank">NG 2.x (Exalead)</a>', # put at end to avoid false positive
+'exabot','<a href="http://www.exabot.com/" title="Bot home page [new window]" target="_blank">Exabot</a>', # put at end to avoid false positive
+# Other id that are 99% of robots
+'wget','WGet tools',
+'libwww','Perl tool',
+'^java\/[0-9]','<a href="http://www.projecthoneypot.org/harvester_useragents.php" title="Bot home page [new window]" target="_blank">Java (Often spam bot)</a>', # put at end to avoid false positive
+# Generic robot
+'robot', 'Unknown robot (identified by \'robot\')',
+'checker', 'Unknown robot (identified by \'checker\')',
+'crawl', 'Unknown robot (identified by \'crawl\')',
+'discovery', 'Unknown robot (identified by \'discovery\')',
+'hunter', 'Unknown robot (identified by \'hunter\')',
+'scanner', 'Unknown robot (identified by \'scanner\')',
+'spider', 'Unknown robot (identified by \'spider\')',
+'sucker', 'Unknown robot (identified by \'sucker\')',
+'bot[\s_+:,\.\;\/\\\-]', 'Unknown robot (identified by \'bot\' followed by a space or one of the following characters _+:,.;/\-)',
+'[\s_+:,\.\;\/\\\-]bot', 'Unknown robot (identified by a space or one of the characters _+:,.;/\- followed by \'bot\')',
+'curl', 'Common *nix tool for automating web document retrieval. Most likely a bot.',
+'php', 'A PHP script',
+'ruby\/', 'Ruby script',
+# Additional bots found by Sussex.
+'^[1-3]$', 'Generic bot identified as "1", "2" or "3"',
+'alltop', 'alltop',
+'applesyndication', 'applesyndication',
+'asynchttpclient', 'asynchttpclient',
+'bingbot', '<a href="http://www.bing.com/bingbot.htm" title="Bing home page [new window]" target="_blank">Bingbot</a>',
+'blogged_crawl', 'blogged_crawl',
+'bloglovin', 'bloglovin',
+'butterfly', 'butterfly',
+'buzztracker', 'buzztracker',
+'carpathia', 'carpathia',
+'catbot', 'catbot',
+'chattertrap', 'chattertrap',
+'check_http', 'check_http (nagios)',
+'coldfusion', 'coldfusion',
+'covario', 'covario',
+'daylifefeedfetcher', 'daylifefeedfetcher',
+'discobot', 'discobot',
+'dlvr\.it', 'dlvr.it',
+'dreamwidth', 'dreamwidth',
+'drupal', 'Drupal Site',
+'ezoom', 'ezoom',
+'feedmyinbox', 'feedmyinbox',
+'feedroll\.com', 'feedroll.com',
+'feedzira', 'feedzira',
+'fever\/', '<a href="http://feedafever.com">Feed a Fever</a>',
+'freenews', 'freenews',
+'geohasher', 'geohasher',
+'hanrss', 'hanrss',
+'inagist', 'inagist',
+'jacobin\sclub', 'jacobin club',
+'jakarta', 'jakarta',
+'js\-kit', 'js-kit',
+'largesmall\scrawler', 'largesmall crawler',
+'linkedinbot', 'linkedinbot',
+'longurl', 'longurl',
+'metauri', 'metauri',
+'microsoft\-webdav\-miniredir', 'microsoft-webdav-miniredir',
+'^motorola$', 'Suspected Bot masquerading as "Motorola"',
+'movabletype', 'movabletype',
+'^mozilla\/3\.0\s\(compatible$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/4\.0$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/4\.0\s\(compatible;\)$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0\s\(compatible;$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0\s\(en\-us\)$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0\sfirefox\/3\.0\.5$', 'Suspected bot masqurading as Mozilla',
+'^msie', 'Suspected bot masquerading as M$ IE',
+'netnewswire', 'netnewswire',
+'\snetseer\s', '<a href="http://www.netseer.com/crawler.html">Net Seer</a>',
+'netvibes', 'netvibes',
+'newrelicpinger', 'newrelicpinger',
+'newsfox', 'Fox News',
+'nextgensearchbot', 'nextgensearchbot',
+'ning', 'ning',
+'pingdom', 'pingdom',
+'pita', 'pita (pain in the ass?)',
+'postpost', 'postpost',
+'postrank', 'postrank',
+'printfulbot', 'printfulbot',
+'protopage', 'protopage',
+'proximic', '<a href="http://www.proximic.com/info/spider.php" title="Proximic Spider home page [new window]" target="_blank">Proximic Spider</a>',
+'quipply', 'quipply',
+'r6\_', '<a href="http://www.radian6.com/crawler">Radian 6 Crawler</a>',
+'ratingburner', 'ratingburner',
+'regator', 'regator',
+'rome\sclient', 'rome client',
+'rpt\-httpclient', 'rpt-httpclient',
+'rssgraffiti', 'rssgraffiti',
+'sage\+\+', 'sage++',
+'scoutjet', '<a href="http://wwww.scoutjet.com/" target="_blank">ScoutJet</a> crawler for <a href="http://blekko.com/" target="_blank">Blekko</a>.',
+'simplepie', 'simplepie',
+'sitebot', 'sitebot',
+'summify\.com', '<a href="http://summify.com/">summify.com</a>',
+'superfeedr', 'superfeedr',
+'synthesio', 'synthesio',
+'teoma', 'teoma',
+'topblogsinfo', 'topblogsinfo',
+'topix\.net', 'topix.net',
+'trapit', 'trapit',
+'trileet', 'trileet',
+'tweetedtimes', '<a href="http://tweetedtimes.com">The Tweeted Times</a>',
+'twisted\spagegetter', 'twisted pagegetter',
+'twitterbot', '<span title="As on Nov. 27, 2015, the user agent string did not contain a web address.">Twitterbot</span>',
+'twitterfeed', 'twitterfeed',
+'unwindfetchor', 'unwindfetchor',
+'wazzup', 'wazzup',
+'windows\-rss\-platform', 'windows-rss-platform',
+'wiumi', 'wiumi',
+'xydo', 'xydo',
+'yahoo!\sslurp', 'Additional Yahoo bots.',
+'yahoo\spipes', 'Additional Yahoo bots.',
+'yahoo\-newscrawler', 'Additional Yahoo bots.',
+'yahoocachesystem', 'Additional Yahoo bots.',
+'yahooexternalcache', 'Additional Yahoo bots.',
+'yahoo!\ssearchmonkey', 'Additional Yahoo bots.',
+'yahooysmcm', 'Additional Yahoo bots.',
+'yammer', 'yammer',
+#'yandexbot', 'yandexbot', #already covered by 'yandex'
+'yeti', 'yeti',
+'yie8', 'yie8',
+'youdao', 'youdao',
+'yourls', 'yourls',
+'zemanta', 'zemanta',
+'zend_http_client', 'Zend Http Client',
+'no_user_agent','Unknown robot (identified by empty user agent string)',
+# Unknown robots identified by hit on robots.txt
+'unknown', 'Unknown robot (identified by hit on \'robots.txt\')'
+);
+
+
+# RobotsAffiliateLib
+# This list try to tell by which Search Engine a robot is used
+#-------------------------------------------------------------
+%RobotsAffiliateLib = (
+'bingpreview'=>'Bing',
+'fast\-webcrawler'=>'AllTheWeb',
+'googlebot'=>'Google',
+'google\-sitemap'=>'Google',
+'google[_+\s]web[_+\s]preview'=>'Google',
+'msnbot'=>'MSN',
+'nutch'=>'Looksmart',
+'scooter'=>'AltaVista',
+'wisenutbot'=>'Looksmart',
+'yahoo\-blogs'=>'Yahoo',
+'yahoo\-verticalcrawler'=>'Yahoo',
+'yahoofeedseeker'=>'Yahoo',
+'yahooseeker\-testing'=>'Yahoo',
+'yahooseeker'=>'Yahoo',
+'yahoo\-mmcrawler'=>'Yahoo',
+'yahoo!_mindset'=>'Yahoo',
+'zyborg'=>'Looksmart',
+'cfetch'=>'Kosmix',
+'^voyager\/'=>'Kosmix',
+# Additional bots found by Sussex.
+'feedfetcher\-google'=>'Google',
+'bingbot'=>'MSN',
+'twitterbot'=>'Twitter',
+'twitterfeed'=>'Twitter',
+'yahoo!\sslurp'=>'Yahoo',
+'yahoo\spipes'=>'Yahoo',
+'yahoo-newscrawler'=>'Yahoo',
+'yahoocachesystem'=>'Yahoo',
+'yahooexternalcache'=>'Yahoo',
+'yahoo!\ssearchmonkey'=>'Yahoo',
+'yahooysmcm'=>'Yahoo'
+);
+
+1;
diff --git a/wwwroot/cgi-bin/lib/search_engines.pm b/wwwroot/cgi-bin/lib/search_engines.pm

index a84dc4e76c22c6f1fdea81818b632e21c22cd341..e56b00801921b929b9cfec24415a9a3626b31623 100644 (file)
--- a/wwwroot/cgi-bin/lib/search_engines.pm
+++ b/wwwroot/cgi-bin/lib/search_engines.pm
@@ -1,1578 +1,1578 @@
-# AWSTATS SEARCH ENGINES DATABASE\r
-#------------------------------------------------------------------------------\r
-# If you want to add a Search Engine to extend AWStats database detection capabilities,\r
-# you must add an entry in SearchEnginesSearchIDOrder, SearchEnginesHashID and in\r
-# SearchEnginesHashLib.\r
-# An entry if known in SearchEnginesKnownUrl is also welcome.\r
-#\r
-# to eldy: Please check if the following description is correct:\r
-# You need the following information to specify a search engine:\r
-# (a) A regular expression that matches the referrer string of the\r
-#    search engine. Unclear: What about slashes in the name of\r
-#    a search engine, e.g. as in 'ecosia.com/search'. Seems that\r
-#    AWStats will non find search strings containing a slash.\r
-#    Maybe use a search string without a slash, and - if necessary -\r
-#    an entry in %NotSearchEnginesKeys , if this search string\r
-#    matches entries that are not search engines.\r
-#    Example of a web address of a Amazon search engine:\r
-#    http://www.amazon.de/gp/bit/apps/web/SERP/search/ref=bit_bds-p24_serp_cr_de?ie=UTF8tagbase=bds-p24&query=deutsch+8.+klasse+gymnasium+protokoll\r
-# (b) A unique string to identify the search engine within AWStats\r
-# (c) A regular expression that finds the start of the query part in the\r
-#    referrer string\r
-# (d) A HTML-fragment that goes into the reports generated by AWStats which\r
-#    identifies the search engine to human reader of the report. In the\r
-#    simplest case this is a string containing the name of the search\r
-#    engine. You can also provide a hypertext clause that presents the\r
-#    name together with a link to the search engine.\r
-#\r
-# The regular expression (a) goes into SearchEnginesSearchIDOrder_list1\r
-# or ..._list2. List 1 contains common search engines, list 2 those\r
-# that are not so often used.\r
-#\r
-# SearchEnginesHashID contains to consecutive entries for each search\r
-# engine: The regular expression (a) followed bei the search engine\r
-# identifier (b)\r
-#\r
-# SearchEnginesKnownUrl specifies how to find the start of the query.\r
-# For each search engine you enter the search engine identifier (b)\r
-# followed by the regular expression (c). Unclear: It is possible to\r
-# omit this entry. If you do this, how will AWStats find the start of\r
-# the query?\r
-#\r
-# SearchEnginesHashLib contains also two entries for each search engine:\r
-# The search engine identifier (b) followed by the HTML-Fragment (d)\r
-#\r
-# There are search engines that do not use a query part in their URLs.\r
-# They put the search expression in the main part of the URL instead.\r
-# AWStats is able to handle these cases. They are specified as described\r
-# above, except the following two things:\r
-# - The regular expression (c) searches the complete URL and not only\r
-#   the query part.\r
-# - An additional Entry in the list %SearchEnginesWithKeysNotInQuery is\r
-#   necessary.\r
-#\r
-#\r
-# AWStats runs a sanity check of the contents of search_engines.pm. This\r
-# check detects the following things:\r
-# - Inconsistencies (number of entries)\r
-# It does not detect the following errors:\r
-# - If the HTML-Fragment (d) is syntactically incorrect.\r
-#\r
-#------------------------------------------------------------------------------\r
-\r
-# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html\r
-#            added minor italian search engines\r
-#                  arianna http://arianna.libero.it/\r
-#                  supereva http://search.supereva.com/\r
-#                  kataweb http://kataweb.it/\r
-#            corrected uk looksmart\r
-#                  'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=',\r
-#            to\r
-#                  'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=',\r
-#            corrected spelling\r
-#                     internationnal -> international\r
-#            added 'google\.'=>'mail\.google\.', to NotSearchEnginesKeys in order to\r
-#            avoid counting gmail referrals as search engine traffic\r
-# 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html\r
-#            avoid counting babelfish.altavista referrals as search engine traffic\r
-#            avoid counting translate.google referrals as search engine traffic\r
-# 2005-11-20 Sean Carlos\r
-#           added missing 'tiscali','key=', entry.  Check order\r
-# 2005-11-22 Sean Carlos\r
-#           added Google Base & Froogle.  Froogle not tested.\r
-# 2006-04-18 Sean Carlos http://www.antezeta.com/awstats.html\r
-#           added biglotron.com (France)\r
-#           added blingo http://www.blingo.com/\r
-#           added Clusty & Vivisimo\r
-#           added eniro.no (Norway) [https://sourceforge.net/forum/message.php?msg_id=3134783]\r
-#           added GPU p2p search http://search.centraldatabase.org/\r
-#           added mail.tiscali to "not search engines list" [https://sourceforge.net/forum/message.php?msg_id=3166688]\r
-#           added Ask group's "mysearch"\r
-#           added sify.com (India)\r
-#           added sogou.com (Cina) [https://sourceforge.net/forum/message.php?msg_id=3501603]\r
-#           Ask changes:\r
-#           - added Ask Japan (ask.jp)\r
-#           - break out Ask new country level variants (DE, ES, FR, IT, NL)\r
-#           - updated Ask name from Ask Jevees\r
-#           - added Ask q= parameter - many recent searches probably not recognized; [https://sourceforge.net/forum/message.php?msg_id=3465444]\r
-#           - updated Ask uk (new uk.ask.com added to older ask.co.uk)\r
-#           updated voila kw|rdata parameter [https://sourceforge.net/forum/message.php?msg_id=3373912]\r
-#           for each new engine, added link to Search Engine.  This serves to document engine. Done for major & Italian engines as well. Requires patch\r
-#              to AWStats to allow untranslated html.  Otherwise html will appear instead of link.\r
-#           reviewed mnoGoSearch (http://www.mnogosearch.org/); the search engined mentioned no longer\r
-#              exists https://sourceforge.net/forum/message.php?msg_id=3025426\r
-# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html\r
-#            added 10 Chello European broadband portals (Austria, Belgium, Czech Republic, France, Hungary, The Netherlands, Norway, Poland, Slovakia, Sweden)\r
-#           added Alice Internal Search (blends data with Google?) search.alice.it.master:10005\r
-#            added detection of google cache views from IPs 66.249.93.104 72.14.203.104 72.14.207.104\r
-#              To do: add more extensive IP list; keywords not yet detected.\r
-#            added icerocket.com blog search http://www.icerocket.com/\r
-#           added live.com (msn) http://www.live.com/\r
-#           added Meta motor kartoo.  Note: Kartoo does not provide search words in referrers, thus the engine will appear in the\r
-#              search engine list but the actual search words are not available.\r
-#           added netluchs.de http://www.netluchs.de/\r
-#           added sphere.com blog search http://www.sphere.com/\r
-#           added wwweasel.de http://wwweasel.de\r
-#           added Yahoo Mindset! http://mindset.research.yahoo.com/\r
-#            updated Mirago query parameter recognition (qry=); added breakout for each country (France, Germany, Spain, Italy, Norway, Sweden, Denmark, Netherlands, Belgium, Switzerland)\r
-# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html\r
-#           added Google cache IPs 64.233.183.104 & 66.102.7.104\r
-# 2006-05-20 Sean Carlos http://www.antezeta.com/awstats.html\r
-#              anzwers.com.au\r
-#              schoenerbrausen.de http://www.schoenerbrausen.de/\r
-#              added Google cache IP 216.239.59.104\r
-#              answerbus http://www.answerbus.com/ (does not provide keywords)\r
-# 2006-05-23 Sean Carlos http://www.antezeta.com/awstats.html\r
-#              added Google cache IP 66.102.9.104, 64.233.161.104\r
-# 2006-06-23 Sean Carlos http://www.antezeta.com/awstats.html\r
-#              added Alice Search search.alice.it\r
-#              added GoodSearch http://www.goodsearch.com/ (does not provide keywords) "a Yahoo-powered search engine that donates money to your favorite charity or school each time you search the web"\r
-#              added googlee.com, variant of Google\r
-#              added gotuneed http://www.gotuneed.com/ Italian search engine, in beta\r
-#              added icq.com\r
-#              added logic to parse Google Cache search keywords. Seems to work for alpha but not numeric cache IDs, i.e. search?q=cache:lWVLmnuGJswJ: is recognized but q=cache:Yv5qxeJNuhgJ: is not recognized. The URL triggering the keywords will also appear.  The URLs are probably too varied to parse out?\r
-#              added Nusearch http://www.nusearch.com/\r
-#              added Polymeta www.polymeta.hu (does not provide keywords)\r
-#              added scroogle http://www.scroogle.org/ (does not always provide keywords)\r
-#              added Tango http://tango.hu/search.php?st=0&q=jeles+napok\r
-#              Changed Google Cache notation 64\.233\.(161|167|179|183|187)\.104 to 64\.233\.1[0-9]{2}\.104\r
-#                                            72\.14\.(203|205|207|209|221)\.104 to 72\.14\.2[0-9]{2}\.104\r
-#                                            216\.239\.(51|59)\.104 to 216\.239\.5[0-9]\.104\r
-#                                            66\.102\.(7|9)\.104 to 66\.102\.[1-9]\.104\r
-# 2006-06-27 Sean Carlos http://www.antezeta.com/awstats.html\r
-#              added Onet.pl http://szukaj.onet.pl/\r
-#              corrected name "Wirtualna Polska" from "Szukaj" (search); added link http://szukaj.wp.pl/\r
-# 2006-06-30 Sean Carlos http://www.antezeta.com/awstats.html\r
-#      Additional Polish Search Engines:\r
-#      added Dodaj.pl http://www.dodaj.pl/\r
-#      added Gazeta.pl http://szukaj.gazeta.pl/\r
-#      added Gery.pl http://szukaj.gery.pl/\r
-#      added Hoga.pl http://www.hoga.pl/\r
-#      added Interia.pl http://www.google.interia.pl/\r
-#      added Katalog.Onet.pl http://katalog.onet.pl/\r
-#      added NetSprint.pl http://www.netsprint.pl/\r
-#      added o2.pl http://szukaj2.o2.pl/\r
-#      added Polska http://szukaj.polska.pl/\r
-#      added Szukacz http://www.szukacz.pl/\r
-#      added Wow.pl http://szukaj.wow.pl/\r
-#      added Sagool http://sagool.jp/\r
-\r
-# 2006-08-25 Social Bookmarks\r
-#      International\r
-#      added del.icio.us/search - for now, just search referrer. To do: consider /tag/(tagname) referrer?\r
-#      added stumbleupon.com - No keywords supplied.\r
-#      added swik.net\r
-#       added digg. Keywords sometimes supplied.\r
-#      Italy\r
-#      added segnalo.alice.it - No keywords supplied.\r
-#      added ineffabile.it - No keywords supplied.\r
-\r
-#       added filter for google groups.  Attempt to parse group name as keyword.\r
-\r
-# 2006-09-14\r
-#      added Eniro Sverige http://www.eniro.se/\r
-#      added MyWebSearch http://search.mywebsearch.com/\r
-#      added Teecno http://www.teecno.it/ Italian Open Source Search Engine\r
-\r
-#package AWSSE;\r
-\r
-# 2006-09-25 (Gabor Moizes)\r
-# added 4-counter (Google alternative) http://4-counter.com/\r
-# added Googlecom (Google alternative) http://googlecom.com/\r
-# added Goggle (Google alternative) http://goggle.co.hu/\r
-# added Comet toolbar http://as.starware.com\r
-# added new IP for Yahoo: 216.109.125.130\r
-# added Ledix http://ledix.net/\r
-# added AT&T search (powered by Google) http://www.att.net/\r
-# added Keresolap (Hungarian search engine) http://www.keresolap.hu/\r
-# added Mozbot (French search engine) http://www.mozbot.fr/\r
-# added Zoznam (Slovak search engine) http://www.zoznam.sk/\r
-# added sapo.pt (Portuguese search engine) http://www.sapo.pt/\r
-# added shaw.ca (powered by Google) http://start.shaw.ca/\r
-# added Searchalot http://www.searchalot.com/\r
-# added Copernic http://www.copernic.com/\r
-# added 216.109.125.130 to Yahoo\r
-# added 66.218.69.11 to Yahoo\r
-# added Avantfind http://www.avantfind.com/\r
-# added Steadysearch http://www.steadysearch.com/\r
-# added Steadysearch http://www.steady-search.com/\r
-# modified 216\.239\.5[0-9]\.104/search to 216\.239\.5[0-9]\.104\r
-\r
-\r
-# SearchEnginesSearchIDOrder\r
-# It contains all matching criteria to search for in log fields. This list is\r
-# used to know in which order to search Search Engines IDs.\r
-# Most frequent one are in list1, used when LevelForSearchEnginesDetection is 1 or more\r
-# Minor robots are in list2, used when LevelForSearchEnginesDetection is 2 or more\r
-# Note: Regex IDs are in lower case and ' ' and '+' are changed into '_'\r
-#------------------------------------------------------------------------------\r
-@SearchEnginesSearchIDOrder_list1=(\r
-# Major international search engines\r
-'google\.[\w.]+/products',\r
-'base\.google\.',\r
-'froogle\.google\.',\r
-'groups\.google\.',\r
-'images\.google\.',\r
-'google\.',\r
-'googlee\.',\r
-'googlecom\.com',\r
-'goggle\.co\.hu',\r
-'216\.239\.32\.20',\r
-'173\.194\.32\.223',\r
-'216\.239\.(35|37|39|51)\.100',\r
-'216\.239\.(35|37|39|51)\.101',\r
-'216\.239\.5[0-9]\.104',\r
-'64\.233\.1[0-9]{2}\.104',\r
-'66\.102\.[1-9]\.104',\r
-'66\.249\.93\.104',\r
-'72\.14\.2[0-9]{2}\.104',\r
-'msn\.',\r
-'live\.com',\r
-'bing\.',\r
-'voila\.',\r
-'mindset\.research\.yahoo',\r
-'yahoo\.','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)',\r
-'search\.aol\.co',\r
-'tiscali\.',\r
-'lycos\.',\r
-'alexa\.com',\r
-'alltheweb\.com',\r
-'altavista\.',\r
-'a9\.com',\r
-'dmoz\.org',\r
-'netscape\.',\r
-'search\.terra\.',\r
-'www\.search\.com',\r
-'search\.sli\.sympatico\.ca',\r
-'excite\.'\r
-);\r
-\r
-@SearchEnginesSearchIDOrder_list2=(\r
-# Minor international search engines\r
-'4\-counter\.com',\r
-'att\.net',\r
-'bungeebonesdotcom',\r
-'northernlight\.',\r
-'hotbot\.',\r
-'kvasir\.',\r
-'webcrawler\.',\r
-'metacrawler\.',\r
-'go2net\.com',\r
-'(^|\.)go\.com',\r
-'euroseek\.',\r
-'looksmart\.',\r
-'spray\.',\r
-'nbci\.com\/search',\r
-'de\.ask.\com', # break out Ask country specific engines.  (.jp is in Japan section)\r
-'es\.ask.\com',\r
-'fr\.ask.\com',\r
-'it\.ask.\com',\r
-'nl\.ask.\com',\r
-'uk\.ask.\com',\r
-'(^|\.)ask\.com',\r
-'atomz\.',\r
-'overture\.com',               # Replace 'goto\.com','Goto.com',\r
-'teoma\.',\r
-'findarticles\.com',\r
-'infospace\.com',\r
-'mamma\.',\r
-'dejanews\.',\r
-'dogpile\.com',\r
-'wisenut\.com',\r
-'ixquick\.com',\r
-'search\.earthlink\.net',\r
-'i-une\.com',\r
-'blingo\.com',\r
-'centraldatabase\.org',\r
-'clusty\.com',\r
-'mysearch\.',\r
-'vivisimo\.com',\r
-'kartoo\.com',\r
-'icerocket\.com',\r
-'sphere\.com',\r
-'ledix\.net',\r
-'start\.shaw\.ca',\r
-'searchalot\.com',\r
-'copernic\.com',\r
-'avantfind\.com',\r
-'steadysearch\.com',\r
-'steady-search\.com',\r
-'claro-search\.com',\r
-'www1\.search-results\.com',\r
-'www\.holasearch\.com',\r
-'search\.conduit\.com',\r
-'static\.flipora\.com',\r
-'(?:www[12]?|mixidj)\.delta-search\.com',\r
-'start\.iminent\.com',\r
-'www\.searchmobileonline\.com',\r
-'int\.search-results\.com',\r
-'www2\.inbox\.com',\r
-'www\.govome\.com',\r
-'find1friend\.com',\r
-'start\.mysearchdial\.com',\r
-'go\.speedbit\.com',\r
-'search\.certified-toolbar\.com',\r
-'search\.sweetim\.com',\r
-'search\.searchcompletion\.com',\r
-'en\.eazel\.com',\r
-'sr\.searchfunmoods\.com',\r
-'173\.194\.35\.177',\r
-'dalesearch\.com',\r
-'sweetpacks-search\.com',\r
-'searchgol\.com',\r
-'duckduckgo\.com',\r
-'sr\.facemoods\.com',\r
-'shoppstop\.com',\r
-'searchya\.com',\r
-'picsearch\.de',\r
-'webssearches\.com',\r
-'airzip\.inspsearch\.com',\r
-'zapmeta\.de',\r
-'localmoxie\.com',\r
-'search-results\.mobi',\r
-'androidsearch\.com',\r
-'isearch\.nation\.com',\r
-'search\.zonealarm\.com',\r
-'www\.buenosearch\.com',\r
-'search\.foxtab\.com',\r
-'searches\.qone8\.com',\r
-'startpage\.com',\r
-'www\.qwant\.com',\r
-'searches\.safehomepage\.com',\r
-'searches\.vi-view\.com',\r
-'wow\.utop\.it',\r
-'windowssearch\.com',\r
-'www\.wow\.com',\r
-'globososo\.',\r
-'kingtale3\.inspsearch\.com',\r
-'swisscows\.ch',\r
-'preciobarato\.xyz',\r
-'www\.dregol\.com',\r
-'search\.socialdownloadr\.com',\r
-'int\.search\.myway\.com',\r
-'de\.dolphin\.com',\r
-'mys\.yoursearch\.me',\r
-# Chello Portals\r
-'chello\.at',\r
-'chello\.be',\r
-'chello\.cz',\r
-'chello\.fr',\r
-'chello\.hu',\r
-'chello\.nl',\r
-'chello\.no',\r
-'chello\.pl',\r
-'chello\.se',\r
-'chello\.sk',\r
-'chello', # required as catchall for new countries not yet known\r
-# Mirago\r
-'mirago\.be',\r
-'mirago\.ch',\r
-'mirago\.de',\r
-'mirago\.dk',\r
-'es\.mirago\.com',\r
-'mirago\.fr',\r
-'mirago\.it',\r
-'mirago\.nl',\r
-'no\.mirago\.com',\r
-'mirago\.se',\r
-'mirago\.co\.uk',\r
-'mirago', # required as catchall for new countries not yet known\r
-'answerbus\.com',\r
-'icq\.com\/search',\r
-'nusearch\.com',\r
-'goodsearch\.com',\r
-'scroogle\.org',\r
-'questionanswering\.com',\r
-'mywebsearch\.com',\r
-'as\.starware\.com',\r
-# Social Bookmarking Services\r
-'del\.icio\.us',\r
-'digg\.com',\r
-'stumbleupon\.com',\r
-'swik\.net',\r
-'segnalo\.alice\.it',\r
-'ineffabile\.it',\r
-# Minor Australian search engines\r
-'anzwers\.com\.au',\r
-# Minor brazilian search engines\r
-'engine\.exe', 'miner\.bol\.com\.br',\r
-# Minor chinese search engines\r
-'\.baidu\.com',     # baidu search portal\r
-'\.vnet\.cn',       # powered by MSN\r
-'\.soso\.com',      # powered by Google\r
-'\.sogou\.com',     # powered by Sohu\r
-'\.3721\.com',      # powered by Yahoo!\r
-'iask\.com',        # powered by Sina\r
-'\.accoona\.com',   # Accoona\r
-'\.163\.com',       # powered by Google\r
-'\.zhongsou\.com',  # zhongsou search portal\r
-# Minor czech search engines\r
-'atlas\.cz','seznam\.cz','quick\.cz','centrum\.cz','jyxo\.(cz|com)','najdi\.to','redbox\.cz',\r
-'isearch\.avg\.com',\r
-# Minor danish search-engines\r
-'opasia\.dk', 'danielsen\.com', 'sol\.dk', 'jubii\.dk', 'find\.dk', 'edderkoppen\.dk', 'netstjernen\.dk', 'orbis\.dk', 'tyfon\.dk', '1klik\.dk', 'ofir\.dk',\r
-# Minor dutch search engines\r
-'ilse\.','vindex\.',\r
-# Minor english search engines\r
-'(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk',\r
-'search\.fbdownloader\.com',\r
-'search\.fdownloadr\.com',\r
-'search\.babylon\.com',\r
-'my\.allgameshome\.com',\r
-'surfcanyon\.com',\r
-'uk\.foxstart\.com',\r
-'yandex\.com',\r
-# Minor finnish search engines\r
-'haku\.www\.fi',\r
-# Minor french search engines\r
-'recherche\.aol\.fr','ctrouve\.','francite\.','\.lbb\.org','rechercher\.libertysurf\.fr', 'search[\w\-]+\.free\.fr', 'recherche\.club-internet\.fr',\r
-'toile\.com', 'biglotron\.com',\r
-'mozbot\.fr',\r
-# Minor german search engines\r
-'sucheaol\.aol\.de',\r
-'o2suche\.aol\.de',\r
-'fireball\.de','infoseek\.de','suche\d?\.web\.de','[a-z]serv\.rrzn\.uni-hannover\.de',\r
-'suchen\.abacho\.de','(brisbane|suche)\.t-online\.de','allesklar\.de','meinestadt\.de',\r
-'212\.227\.33\.241',\r
-'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)',\r
-'wwweasel\.de',\r
-'netluchs\.de',\r
-'schoenerbrausen\.de',\r
-'suche\.gmx\.net',\r
-'suche\.gmx\.at',\r
-'ecosia\.org',\r
-'de\.aolsearch\.com',\r
-'suche\.aol\.de',\r
-'www\.startxxl\.com',\r
-'www\.benefind\.de',\r
-'www\.amazon\.de.*search', #Just as a reminder, probably will not work as AWstats seems to consider the host part of an URL only\r
-'de\.wow\.com',\r
-'www\.vlips\.de',\r
-'metager\.de',\r
-'search\.1und1\.de',\r
-'sm\.de',\r
-'sumaja\.de',\r
-'navigationshilfe\.t-online\.de',\r
-'umfis\.de',\r
-'fastbot\.de',\r
-'tixuma\.de',\r
-'suche\.freenet\.de',\r
-'www\.izito\.de',\r
-'extern\.peoplecheck\.de',\r
-'www\.oneseek\.de',\r
-'de\.wiki\.gov\.cn',\r
-'umuwa\.de',\r
-'suche\.1und1\.de',\r
-'www\.metasuche\.ch',\r
-# Minor Hungarian search engines\r
-'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu',\r
-'tango\.hu',\r
-'keresolap\.hu',\r
-'kereso\.startlap\.hu',\r
-'polymeta\.hu',\r
-# Minor Indian search engines\r
-'sify\.com',\r
-# Minor Italian search engines\r
-'virgilio\.it','arianna\.libero\.it','supereva\.com','kataweb\.it','search\.alice\.it\.master','search\.alice\.it','gotuneed\.com',\r
-'godado','jumpy\.it','shinyseek\.it','teecno\.it',\r
-# Minor Israeli search engines\r
-'search\.genieo\.com',\r
-# Minor Japanese search engines\r
-'ask\.jp','sagool\.jp',\r
-'websearch\.rakuten\.co\.jp',\r
-# Minor Norwegian search engines\r
-'sok\.start\.no', 'eniro\.no',\r
-# Minor Polish search engines\r
-'szukaj\.wp\.pl','szukaj\.onet\.pl','dodaj\.pl','gazeta\.pl','gery\.pl','hoga\.pl','netsprint\.pl','interia\.pl','katalog\.onet\.pl','o2\.pl','polska\.pl','szukacz\.pl','wow\.pl',\r
-# Minor russian search engines\r
-'ya(ndex)?\.ru', 'aport\.ru', 'rambler\.ru', 'turtle\.ru', 'metabot\.ru',\r
-'go\.mail\.ru',\r
-# Minor Swedish search engines\r
-'evreka\.passagen\.se','eniro\.se',\r
-# Minor Slovak search engines\r
-'zoznam\.sk',\r
-# Minor Portuguese search engines\r
-'sapo\.pt',\r
-# Minor swiss search engines\r
-'search\.ch', 'search\.bluewin\.ch',\r
-'www\.zapmeta\.ch',\r
-'etools\.ch',\r
-# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines\r
-'pogodak\.'\r
-);\r
-@SearchEnginesSearchIDOrder_listgen=(\r
-# Generic search engines\r
-'search\..*\.\w+'\r
-);\r
-\r
-\r
-# NotSearchEnginesKeys\r
-# If a search engine key is found, we check its exclude list to know if it's\r
-# really a search engine\r
-#------------------------------------------------------------------------------\r
-%NotSearchEnginesKeys=(\r
-'altavista\.'=>'babelfish\.altavista\.',\r
-'google\.'=>'mail\.google\.',\r
-'google\.'=>'translate\.google\.',\r
-'google\.'=>'code\.google\.',\r
-'google\.'=>'groups\.google\.',\r
-'msn\.'=>'hotmail\.msn\.',\r
-'tiscali\.'=>'mail\.tiscali\.',\r
-'yahoo\.'=>'(?:picks|mail)\.yahoo\.|yahoo\.[^/]+/picks',\r
-'yandex\.'=>'direct\.yandex\.'\r
-);\r
-\r
-\r
-# SearchEnginesHashID\r
-# Each Search Engine Search ID is associated to an AWStats id string\r
-#------------------------------------------------------------------------------\r
-%SearchEnginesHashID = (\r
-# Major international search engines\r
-'google\.[\w.]+/products','google_products',\r
-'base\.google\.','google_base',\r
-'froogle\.google\.','google_froogle',\r
-'groups\.google\.','google_groups',\r
-'images\.google\.','google_image',\r
-'google\.','google',\r
-'googlee\.','google',\r
-'googlecom\.com','google',\r
-'goggle\.co\.hu','google',\r
-'216\.239\.32\.20', 'google',\r
-'173\.194\.32\.223', 'google',\r
-'216\.239\.(35|37|39|51)\.100','google_cache',\r
-'216\.239\.(35|37|39|51)\.101','google_cache',\r
-'216\.239\.5[0-9]\.104','google_cache',\r
-'64\.233\.1[0-9]{2}\.104','google_cache',\r
-'66\.102\.[1-9]\.104','google_cache',\r
-'66\.249\.93\.104','google_cache',\r
-'72\.14\.2[0-9]{2}\.104','google_cache',\r
-'msn\.','msn',\r
-'live\.com','live',\r
-'bing\.','bing',\r
-'voila\.','voila',\r
-'mindset\.research\.yahoo','yahoo_mindset',\r
-'yahoo\.','yahoo','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)','yahoo',\r
-'lycos\.','lycos',\r
-'alexa\.com','alexa',\r
-'alltheweb\.com','alltheweb',\r
-'altavista\.','altavista',\r
-'a9\.com','a9',\r
-'dmoz\.org','dmoz',\r
-'netscape\.','netscape',\r
-'search\.terra\.','terra',\r
-'www\.search\.com','search.com',\r
-'tiscali\.','tiscali',\r
-'search\.aol\.co','aol',\r
-'search\.sli\.sympatico\.ca','sympatico',\r
-'excite\.','excite',\r
-# Minor international search engines\r
-'4\-counter\.com','google4counter',\r
-'att\.net','att',\r
-'bungeebonesdotcom','bungeebonesdotcom',\r
-'northernlight\.','northernlight',\r
-'hotbot\.','hotbot',\r
-'kvasir\.','kvasir',\r
-'webcrawler\.','webcrawler',\r
-'metacrawler\.','metacrawler',\r
-'go2net\.com','go2net',\r
-'(^|\.)go\.com','go',\r
-'euroseek\.','euroseek',\r
-'looksmart\.','looksmart',\r
-'spray\.','spray',\r
-'nbci\.com\/search','nbci',\r
-'de\.ask.\com','askde', # break out Ask country specific engines.\r
-'es\.ask.\com','askes',\r
-'fr\.ask.\com','askfr',\r
-'it\.ask.\com','askit',\r
-'nl\.ask.\com','asknl',\r
-'uk\.ask.\com','askuk',\r
-'(^|\.)ask\.co\.uk','askuk',\r
-'(^|\.)ask\.com','ask',\r
-'atomz\.','atomz',\r
-'overture\.com','overture',            # Replace 'goto\.com','Goto.com',\r
-'teoma\.','teoma',\r
-'findarticles\.com','findarticles',\r
-'infospace\.com','infospace',\r
-'mamma\.','mamma',\r
-'dejanews\.','dejanews',\r
-'dogpile\.com','dogpile',\r
-'wisenut\.com','wisenut',\r
-'ixquick\.com','ixquick',\r
-'search\.earthlink\.net','earthlink',\r
-'i-une\.com','iune',\r
-'blingo\.com','blingo',\r
-'centraldatabase\.org','centraldatabase',\r
-'clusty\.com','clusty',\r
-'mysearch\.','mysearch',\r
-'vivisimo\.com','vivisimo',\r
-'kartoo\.com','kartoo',\r
-'icerocket\.com','icerocket',\r
-'sphere\.com','sphere',\r
-'ledix\.net','ledix',\r
-'start\.shaw\.ca','shawca',\r
-'searchalot\.com','searchalot',\r
-'copernic\.com','copernic',\r
-'avantfind\.com','avantfind',\r
-'steadysearch\.com','steadysearch',\r
-'steady-search\.com','steadysearch',\r
-'claro-search\.com','clarosearch',\r
-'www1\.search-results\.com', 'searchresults',\r
-'www\.holasearch\.com', 'holasearch',\r
-'search\.conduit\.com', 'conduit',\r
-'static\.flipora\.com', 'flipora',\r
-'(?:www[12]?|mixidj)\.delta-search\.com', 'delta-search',\r
-'start\.iminent\.com', 'iminent',\r
-'www\.searchmobileonline\.com', 'searchmobileonline',\r
-'int\.search-results\.com', 'nortonsavesearch',\r
-'www2\.inbox\.com', 'inbox',\r
-'www\.govome\.com', 'govome',\r
-'find1friend\.com', 'find1friend',\r
-'start\.mysearchdial\.com', 'mysearchdial',\r
-'go\.speedbit\.com', 'speedbit',\r
-'search\.certified-toolbar\.com', 'certifiedtoolbarsearch',\r
-'search\.sweetim\.com', 'sweetim',\r
-'search\.searchcompletion\.com', 'searchcompletion',\r
-'en\.eazel\.com','eazelsearch',\r
-'sr\.searchfunmoods\.com', 'searchfunmoods',\r
-'173\.194\.35\.177', 'googleByIP',\r
-'dalesearch\.com', 'dalesearch',\r
-'sweetpacks-search\.com', 'sweetpacks',\r
-'searchgol\.com', 'searchgol',\r
-'duckduckgo\.com', 'duckduckgo',\r
-'sr\.facemoods\.com', 'facemoods',\r
-'shoppstop\.com', 'shoppstop',\r
-'searchya\.com', 'searchya',\r
-'picsearch\.de', 'picsearch',\r
-'webssearches\.com', 'webssearches',\r
-'airzip\.inspsearch\.com', 'webssearches',\r
-'zapmeta\.de', 'zapmeta',\r
-'localmoxie\.com', 'localmoxie',\r
-'search-results\.mobi', 'search-results_mobi',\r
-'androidsearch\.com', 'androidsearch',\r
-'isearch\.nation\.com', 'isearch_nation_com',\r
-'search\.zonealarm\.com', 'search_zonealarm_com',\r
-'www\.buenosearch\.com', 'www_buenosearch_com',\r
-'search\.foxtab\.com', 'search_foxtab_com',\r
-'searches\.qone8\.com', 'searches_qone8_com',\r
-'startpage\.com', 'startpage_com',\r
-'www\.qwant\.com', 'qwant_com',\r
-'searches\.safehomepage\.com', 'safehomepage_com',\r
-'searches\.vi-view\.com', 'vi-view_com',\r
-'wow\.utop\.it', 'wow_utop_it',\r
-'windowssearch\.com', 'windowssearch_com',\r
-'www\.wow\.com', 'www_wow_com',\r
-'globososo\.', 'globososo',\r
-'kingtale3\.inspsearch\.com', 'globososo',\r
-'swisscows\.ch', 'swisscows_ch',\r
-'preciobarato\.xyz', 'preciobarato_xyz',\r
-'www\.dregol\.com', 'www_dregol_com',\r
-'search\.socialdownloadr\.com', 'search_socialdownloadr_com',\r
-'int\.search\.myway\.com', 'int_search_myway_com',\r
-'de\.dolphin\.com', 'de_dolphin_com',\r
-'mys\.yoursearch\.me', 'mys_yoursearch_me',\r
-# Chello Portals\r
-'chello\.at','chelloat',\r
-'chello\.be','chellobe',\r
-'chello\.cz','chellocz',\r
-'chello\.fr','chellofr',\r
-'chello\.hu','chellohu',\r
-'chello\.nl','chellonl',\r
-'chello\.no','chellono',\r
-'chello\.pl','chellopl',\r
-'chello\.se','chellose',\r
-'chello\.sk','chellosk',\r
-'chello','chellocom',\r
-# Mirago\r
-'mirago\.be','miragobe',\r
-'mirago\.ch','miragoch',\r
-'mirago\.de','miragode',\r
-'mirago\.dk','miragodk',\r
-'es\.mirago\.com','miragoes',\r
-'mirago\.fr','miragofr',\r
-'mirago\.it','miragoit',\r
-'mirago\.nl','miragonl',\r
-'no\.mirago\.com','miragono',\r
-'mirago\.se','miragose',\r
-'mirago\.co\.uk','miragocouk',\r
-'mirago','mirago', # required as catchall for new countries not yet known\r
-'answerbus\.com','answerbus',\r
-'icq\.com\/search','icq',\r
-'nusearch\.com','nusearch',\r
-'goodsearch\.com','goodsearch',\r
-'scroogle\.org','scroogle',\r
-'questionanswering\.com','questionanswering',\r
-'mywebsearch\.com','mywebsearch',\r
-'as\.starware\.com','comettoolbar',\r
-# Social Bookmarking Services\r
-'del\.icio\.us','delicious',\r
-'digg\.com','digg',\r
-'stumbleupon\.com','stumbleupon',\r
-'swik\.net','swik',\r
-'segnalo\.alice\.it','segnalo',\r
-'ineffabile\.it','ineffabile',\r
-# Minor Australian search engines\r
-'anzwers\.com\.au','anzwers',\r
-# Minor brazilian search engines\r
-'engine\.exe','engine',\r
-'miner\.bol\.com\.br','miner',\r
-# Minor chinese search engines\r
-'\.baidu\.com','baidu',\r
-'iask\.com','iask',\r
-'\.accoona\.com','accoona',\r
-'\.3721\.com','3721',\r
-'\.163\.com','netease',\r
-'\.soso\.com','soso',\r
-'\.zhongsou\.com','zhongsou',\r
-'\.vnet\.cn','vnet',\r
-'\.sogou\.com','sogou',\r
-# Minor czech search engines\r
-'atlas\.cz','atlas',\r
-'seznam\.cz','seznam',\r
-'quick\.cz','quick',\r
-'centrum\.cz','centrum',\r
-'jyxo\.(cz|com)','jyxo',\r
-'najdi\.to','najdi',\r
-'redbox\.cz','redbox',\r
-'isearch\.avg\.com', 'avgsearch',\r
-# Minor danish search-engines\r
-'opasia\.dk','opasia',\r
-'danielsen\.com','danielsen',\r
-'sol\.dk','sol',\r
-'jubii\.dk','jubii',\r
-'find\.dk','finddk',\r
-'edderkoppen\.dk','edderkoppen',\r
-'netstjernen\.dk','netstjernen',\r
-'orbis\.dk','orbis',\r
-'tyfon\.dk','tyfon',\r
-'1klik\.dk','1klik',\r
-'ofir\.dk','ofir',\r
-# Minor dutch search engines\r
-'ilse\.','ilse',\r
-'vindex\.','vindex',\r
-# Minor english search engines\r
-'bbc\.co\.uk/cgi-bin/search','bbc',\r
-'ifind\.freeserve','freeserve',\r
-'looksmart\.co\.uk','looksmartuk',\r
-'splut\.','splut',\r
-'spotjockey\.','spotjockey',\r
-'ukdirectory\.','ukdirectory',\r
-'ukindex\.co\.uk','ukindex',\r
-'ukplus\.','ukplus',\r
-'searchy\.co\.uk','searchy',\r
-'search\.fbdownloader\.com','fbdownloader',\r
-'search\.fdownloadr\.com', 'fdownloadr_com',\r
-'search\.babylon\.com', 'babylon',\r
-'my\.allgameshome\.com', 'allgameshome',\r
-'surfcanyon\.com', 'surfcanyon_com',\r
-'uk\.foxstart\.com', 'uk_foxstart_com',\r
-'yandex\.com', 'yandex_com',\r
-# Minor finnish search engines\r
-'haku\.www\.fi','haku',\r
-# Minor french search engines\r
-'recherche\.aol\.fr','aolfr',\r
-'ctrouve\.','ctrouve',\r
-'francite\.','francite',\r
-'\.lbb\.org','lbb',\r
-'rechercher\.libertysurf\.fr','libertysurf',\r
-'search[\w\-]+\.free\.fr','free',\r
-'recherche\.club-internet\.fr','clubinternet',\r
-'toile\.com','toile',\r
-'biglotron\.com', 'biglotron',\r
-'mozbot\.fr', 'mozbot',\r
-# Minor german search engines\r
-'sucheaol\.aol\.de','aolde',\r
-'o2suche\.aol\.de','o2aolde',\r
-'fireball\.de','fireball',\r
-'infoseek\.de','infoseek',\r
-'suche\d?\.web\.de','webde',\r
-'[a-z]serv\.rrzn\.uni-hannover\.de','meta',\r
-'suchen\.abacho\.de','abacho',\r
-'(brisbane|suche)\.t-online\.de','t-online',\r
-'allesklar\.de','allesklar',\r
-'meinestadt\.de','meinestadt',\r
-'212\.227\.33\.241','metaspinner',\r
-'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)','metacrawler_de',\r
-'wwweasel\.de','wwweasel',\r
-'netluchs\.de','netluchs',\r
-'schoenerbrausen\.de','schoenerbrausen',\r
-'suche\.gmx\.net', 'gmxsuche',\r
-'suche\.gmx\.at', 'gmxsuche_at',\r
-'ecosia\.org', 'ecosiasearch',\r
-'de\.aolsearch\.com', 'aolsearch',\r
-'suche\.aol\.de', 'aolsuche',\r
-'www\.startxxl\.com', 'startxxl',\r
-'www\.benefind\.de', 'benefind',\r
-'www\.amazon\.de.*search', 'amazonsearch', #Not clear if this matches amazon searches only\r
-'de\.wow\.com', 'wowsearch',\r
-'www\.vlips\.de', 'vlips_de',\r
-'metager\.de', 'metager',\r
-'search\.1und1\.de', 'search_1und1_de',\r
-'sm\.de', 'smde',\r
-'sumaja\.de', 'sumaja',\r
-'navigationshilfe\.t-online\.de', 'navigationshilfe',\r
-'umfis\.de', 'umfis',\r
-'fastbot\.de', 'fastbot_de',\r
-'tixuma\.de', 'tixuma_de',\r
-'suche\.freenet\.de', 'freenet_de',\r
-'www\.izito\.de', 'izito_de',\r
-'extern\.peoplecheck\.de', 'peoplecheck_de',\r
-'www\.oneseek\.de', 'oneseek_de',\r
-'de\.wiki\.gov\.cn', 'de_wiki_gov_cn',\r
-'umuwa\.de', 'umuwa_de',\r
-'suche\.1und1\.de', '1und1_de',\r
-'www\.metasuche\.ch', 'metasuche_ch',\r
-# Minor Hungarian search engines\r
-'heureka\.hu','heureka',\r
-'vizsla\.origo\.hu','origo',\r
-'lapkereso\.hu','lapkereso',\r
-'goliat\.hu','goliat',\r
-'index\.hu','indexhu',\r
-'wahoo\.hu','wahoo',\r
-'webmania\.hu','webmania',\r
-'search\.internetto\.hu','internetto',\r
-'tango\.hu','tango_hu',\r
-'keresolap\.hu','keresolap_hu',\r
-'kereso\.startlap\.hu', 'startlap_hu',\r
-'polymeta\.hu','polymeta_hu',\r
-# Minor Indian search engines\r
-'sify\.com','sify',\r
-# Minor Italian search engines\r
-'virgilio\.it','virgilio',\r
-'arianna\.libero\.it','arianna',\r
-'supereva\.com','supereva',\r
-'kataweb\.it','kataweb',\r
-'search\.alice\.it\.master','aliceitmaster',\r
-'search\.alice\.it','aliceit',\r
-'gotuneed\.com','gotuneed',\r
-'godado','godado',\r
-'jumpy\.it','jumpy\.it',\r
-'shinyseek\.it','shinyseek\.it',\r
-'teecno\.it','teecnoit',\r
-# Minor Israeli search engines\r
-'search\.genieo\.com', 'genieo',\r
-# Minor Japanese search engines\r
-'ask\.jp','askjp',\r
-'sagool\.jp','sagool',\r
-'websearch\.rakuten\.co\.jp', 'rakuten',\r
-# Minor Norwegian search engines\r
-'sok\.start\.no','start', 'eniro\.no','eniro',\r
-# Minor Polish search engines\r
-'szukaj\.wp\.pl','wp',\r
-'szukaj\.onet\.pl','onetpl',\r
-'dodaj\.pl','dodajpl',\r
-'gazeta\.pl','gazetapl',\r
-'gery\.pl','gerypl',\r
-'netsprint\.pl\/hoga\-search','hogapl',\r
-'netsprint\.pl','netsprintpl',\r
-'interia\.pl','interiapl',\r
-'katalog\.onet\.pl','katalogonetpl',\r
-'o2\.pl','o2pl',\r
-'polska\.pl','polskapl',\r
-'szukacz\.pl','szukaczpl',\r
-'wow\.pl','wowpl',\r
-# Minor russian search engines\r
-'ya(ndex)?\.ru','yandex',\r
-'aport\.ru','aport',\r
-'rambler\.ru','rambler',\r
-'turtle\.ru','turtle',\r
-'metabot\.ru','metabot',\r
-'go\.mail\.ru', 'mailru',\r
-# Minor Swedish search engines\r
-'evreka\.passagen\.se','passagen',\r
-'eniro\.se','enirose',\r
-# Minor Slovak search engines\r
-'zoznam\.sk','zoznam',\r
-# Minor Portuguese search engines\r
-'sapo\.pt','sapo',\r
-# Minor swiss search engines\r
-'search\.ch','searchch',\r
-'search\.bluewin\.ch','bluewin',\r
-'www\.zapmeta\.ch', 'zapmeta_ch',\r
-'etools\.ch', 'etools_ch',\r
-# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines\r
-'pogodak\.','pogodak',\r
-# Generic search engines\r
-'search\..*\.\w+','search'\r
-);\r
-\r
-\r
-# SearchEnginesWithKeysNotInQuery\r
-# List of search engines that store keyword as page instead of query parameter\r
-#------------------------------------------------------------------------------\r
-%SearchEnginesWithKeysNotInQuery=(\r
-'a9',1,    # www.a9.com/searchkey1%20searchkey2\r
-'iminent',1, #http://start.iminent.com/StartWeb/1031/toolbox/#q=searchkey1%20searchkey2&additional_arguments\r
-'de_wiki_gov_cn',1, #http://de.wiki.gov.cn/s_searchkey1%20searchkey2\r
-'umuwa_de', 1, #http://umuwa.de/searchkey or http://umuwa.de/searchkey/Images\r
-'amazonsearch', 1 #http://www.amazon.de/gp/bit/apps/web/SERP/search/ref=bit_bds-p24_serp_cr_de?ie=UTF8tagbase=bds-p24&query=deutsch+8.+klasse+gymnasium+protokoll\r
-);\r
-\r
-# SearchEnginesKnownUrl\r
-# Known rules to extract keywords from a referrer search engine URL\r
-#------------------------------------------------------------------------------\r
-%SearchEnginesKnownUrl=(\r
-# Most common search engines\r
-'alexa','q=',\r
-'alltheweb','q(|uery)=',\r
-'altavista','q=',\r
-'a9','a9\.com\/',\r
-'dmoz','search=',\r
-'google_products','(p|q|as_p|as_q)=',\r
-'google_base','(p|q|as_p|as_q)=',\r
-'google_froogle','(p|q|as_p|as_q)=',\r
-'google_groups','group\/', # does not work\r
-'google_image','(p|q|as_p|as_q)=',\r
-'google_cache','(p|q|as_p|as_q)=cache:[0-9A-Za-z]{12}:',\r
-'google','(p|q|as_p|as_q)=',\r
-'lycos','query=',\r
-'msn','q=',\r
-'live','q=',\r
-'bing','q=',\r
-'netscape','search=',\r
-'tiscali','key=',\r
-'aol','query=',\r
-'terra','query=',\r
-'voila','(kw|rdata)=',\r
-'search.com','q=',\r
-'yahoo_mindset','p=',\r
-'yahoo','p=',\r
-'sympatico', 'query=',\r
-'excite','search=',\r
-# Minor international search engines\r
-'google4counter','(p|q|as_p|as_q)=',\r
-'att','qry=',\r
-'bungeebonesdotcom','query=',\r
-'go','qt=',\r
-'askde','(ask|q)=', # break out Ask country specific engines.\r
-'askes','(ask|q)=',\r
-'askfr','(ask|q)=',\r
-'askit','(ask|q)=',\r
-'asknl','(ask|q)=',\r
-'ask','(ask|q)=',\r
-'atomz','sp-q=',\r
-'euroseek','query=',\r
-'findarticles','key=',\r
-'go2net','general=',\r
-'hotbot','mt=',\r
-'infospace','qkw=',\r
-'kvasir', 'q=',\r
-'looksmart','key=',\r
-'mamma','query=',\r
-'metacrawler','general=',\r
-'nbci','keyword=',\r
-'northernlight','qr=',\r
-'overture','keywords=',\r
-'dogpile', 'q(|kw)=',\r
-'spray','string=',\r
-'teoma','q=',\r
-'webcrawler','searchText=',\r
-'wisenut','query=',\r
-'ixquick', 'query=',\r
-'earthlink', 'q=',\r
-'iune','(keywords|q)=',\r
-'blingo','q=',\r
-'centraldatabase','query=',\r
-'clusty','query=',\r
-'mysearch','searchfor=',\r
-'vivisimo','query=',\r
-# kartoo: No keywords passed in referring URL.\r
-'kartoo','',\r
-'icerocket','q=',\r
-'sphere','q=',\r
-'ledix','q=',\r
-'shawca','q=',\r
-'searchalot','q=',\r
-'copernic','web\/',\r
-'avantfind','keywords=',\r
-'steadysearch','w=',\r
-'clarosearch','q=',\r
-'searchresults','q=',\r
-'holasearch', 'q=',\r
-'conduit', 'q=',\r
-'flipora', 'q=',\r
-'delta-search', 'q=',\r
-'iminent', 'q=',\r
-'searchmobileonline', 'q=',\r
-'nortonsavesearch', 'q=',\r
-'inbox', 'q(?:kw)?=',\r
-'govome', 'q=',\r
-'find1friend', 'q=',\r
-'mysearchdial', 'q=',\r
-'speedbit', 'q=',\r
-'certifiedtoolbarsearch', 'q=',\r
-'sweetim', 'q=',\r
-'searchcompletion', 'q=',\r
-'eazelsearch', 'q=',\r
-'searchfunmoods', 'q=',\r
-'googleByIP', 'q=',\r
-'dalesearch',  'q=',\r
-'sweetpacks', 'q=',\r
-'searchgol', 'q=',\r
-'duckduckgo', 'uddg=',\r
-'facemoods', 'q=',\r
-'shoppstop', 'keywords=',\r
-'searchya', 'q=',\r
-'picsearch', 'q=',\r
-'webssearches', 'q=',\r
-'zapmeta', 'query=',\r
-'localmoxie', 'keyword=',\r
-'search-results_mobi', 'q=',\r
-'androidsearch', 'q=',\r
-'isearch_nation_com', 'q=',\r
-'search_zonealarm_com', 'q=',\r
-'www_buenosearch_com', 'q=',\r
-'search_foxtab_com', 'q=',\r
-'searches_qone8_com', 'q=',\r
-'startpage_com', 'query=',\r
-'qwant_com', 'q=',\r
-'safehomepage_com', 'q=',\r
-'vi-view_com', 'q=',\r
-'wow_utop_it', 'q=',\r
-'windowssearch_com', 'q=',\r
-'www_wow_com', 'q=',\r
-'globososo', 'q=',\r
-'swisscows_ch', 'query=',\r
-'preciobarato_xyz', 's=',\r
-'www_dregol_com', 'q=',\r
-'search_socialdownloadr_com', 'q=',\r
-'int_search_myway_com', 'searchfor=',\r
-'de_dolphin_com', 'q=',\r
-'mys_yoursearch_me', 'q=',\r
-# Chello Portals\r
-'chelloat','q1=',\r
-'chellobe','q1=',\r
-'chellocz','q1=',\r
-'chellofr','q1=',\r
-'chellohu','q1=',\r
-'chellonl','q1=',\r
-'chellono','q1=',\r
-'chellopl','q1=',\r
-'chellose','q1=',\r
-'chellosk','q1=',\r
-'chellocom','q1=',\r
-# Mirago\r
-'miragobe','(txtsearch|qry)=',\r
-'miragoch','(txtsearch|qry)=',\r
-'miragode','(txtsearch|qry)=',\r
-'miragodk','(txtsearch|qry)=',\r
-'miragoes','(txtsearch|qry)=',\r
-'miragofr','(txtsearch|qry)=',\r
-'miragoit','(txtsearch|qry)=',\r
-'miragonl','(txtsearch|qry)=',\r
-'miragono','(txtsearch|qry)=',\r
-'miragose','(txtsearch|qry)=',\r
-'miragocouk','(txtsearch|qry)=',\r
-'mirago','(txtsearch|qry)=',\r
-'answerbus','', # Does not provide query parameters\r
-'icq','q=',\r
-'nusearch','nusearch_terms=',\r
-'goodsearch','Keywords=',\r
-'scroogle','Gw=', # Does not always provide query parameters\r
-'questionanswering','',\r
-'mywebsearch','searchfor=',\r
-'comettoolbar','qry=',\r
-# Social Bookmarking Services\r
-'delicious','all=',\r
-'digg','s=',\r
-'stumbleupon','',\r
-'swik','swik\.net/', # does not work. Keywords follow domain, e.g. http://swik.net/awstats+analytics\r
-'segnalo','',\r
-'ineffabile','',\r
-# Minor Australian search engines\r
-'anzwers','search=',\r
-# Minor brazilian search engines\r
-'engine','p1=', 'miner','q=',\r
-# Minor chinese search engines\r
-'baidu','(wd|word)=',\r
-'iask','(w|k)=',\r
-'accoona','qt=',\r
-'3721','(p|name)=',\r
-'netease','q=',\r
-'soso','q=',\r
-'zhongsou','(word|w)=',\r
-'sogou', 'query=',\r
-'vnet','kw=',\r
-# Minor czech search engines\r
-'atlas','(searchtext|q)=', 'seznam','(w|q)=', 'quick','query=', 'centrum','q=', 'jyxo','(s|q)=', 'najdi','dotaz=', 'redbox','srch=',\r
-'avgsearch', 'q=',\r
-# Minor danish search engines\r
-'opasia','q=', 'danielsen','q=', 'sol','q=', 'jubii','soegeord=', 'finddk','words=', 'edderkoppen','query=', 'orbis','search_field=', '1klik','query=', 'ofir','querytext=',\r
-# Minor dutch search engines\r
-'ilse','search_for=', 'vindex','in=',\r
-# Minor english search engines\r
-'askuk','(ask|q)=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=',\r
-'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=',\r
-'fbdownloader','q=',\r
-'fdownloadr_com', 'q=',\r
-'babylon','q=',\r
-'allgameshome', 's=',\r
-'surfcanyon_com', 'q=',\r
-'uk_foxstart_com', 'q=',\r
-'yandex_com', 'text=',\r
-# Minor finnish search engines\r
-'haku','w=',\r
-# Minor french search engines\r
-'francite','name=', 'clubinternet', 'q=',\r
-'toile', 'q=',\r
-'biglotron','question=',\r
-'mozbot','q=',\r
-# Minor german search engines\r
-'aolde','q=',\r
-'o2aolde', 'q=',\r
-'fireball','q=', 'infoseek','qt=', 'webde','su=',\r
-'abacho','q=', 't-online','q=',\r
-'metaspinner','qry=',\r
-'metacrawler_de','qry=',\r
-'wwweasel','q=',\r
-'netluchs','query=',\r
-'schoenerbrausen','q=',\r
-'gmxsuche', 'q=',\r
-'gmxsuche_at', 'q=',\r
-'ecosiasearch', 'q=',\r
-'aolsearch', 'q=',\r
-'aolsuche', 'q=',\r
-'startxxl', 'q=',\r
-'benefind', 'q=',\r
-'amazonsearch', 'query=',\r
-'wowsearch', 'q=',\r
-'vlips_de', 'q=',\r
-'metager', 'eingabe=',\r
-'search_1und1_de', 'q=',\r
-'smde', 'q=',\r
-#'sumaja', 'no query string available', #There is no query string in the referrer url\r
-'navigationshilfe', 'q=',\r
-'umfis', 'suchbegriff=',\r
-'fastbot_de', 'red=[0-9]*\+',\r
-'tixuma_de', 'sc=',\r
-'freenet_de', 'query=',\r
-'izito_de', 'q=',\r
-'peoplecheck_de', 'q=',\r
-'oneseek_de', 'q=',\r
-'de_wiki_gov_cn', 'de\.wiki\.gov\.cn\/s_',\r
-'umuwa_de', 'umuwa\.de\/',\r
-'1und1_de', 'q=',\r
-'metasuche_ch', 'q=',\r
-# Minor Hungarian search engines\r
-'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=',\r
-'keresolap_hu','q=',\r
-'startlap_hu', 'q=',\r
-'tango_hu','q=',\r
-'polymeta_hu','',\r
-# Minor Indian search engines\r
-'sify','keyword=',\r
-# Minor Italian search engines\r
-'virgilio','qs=',\r
-'arianna','query=',\r
-'supereva','q=',\r
-'kataweb','q=',\r
-'aliceitmaster','qs=',\r
-'aliceit','qs=',\r
-'gotuneed','', # Not yet known\r
-'godado','Keywords=',\r
-'jumpy\.it','searchWord=',\r
-'shinyseek\.it','KEY=',\r
-'teecnoit','q=',\r
-# Minor Israeli search engines\r
-'genieo','q=',\r
-# Minor Japanese search engines\r
-'askjp','(ask|q)=',\r
-'sagool','q=',\r
-'rakuten', 'qt=',\r
-# Minor Norwegian search engines\r
-'start','q=', 'eniro','q=',\r
-# Minor Polish search engines\r
-'wp','szukaj=',\r
-'onetpl','qt=',\r
-'dodajpl','keyword=',\r
-'gazetapl','slowo=',\r
-'gerypl','q=',\r
-'hogapl','qt=',\r
-'netsprintpl','q=',\r
-'interiapl','q=',\r
-'katalogonetpl','qt=',\r
-'o2pl','qt=',\r
-'polskapl','qt=',\r
-'szukaczpl','q=',\r
-'wowpl','q=',\r
-# Minor russian search engines\r
-'yandex', 'text=', 'rambler','words=', 'aport', 'r=', 'metabot', 'st=',\r
-'mailru', 'q=',\r
-# Minor swedish search engines\r
-'passagen','q=',\r
-'enirose', 'hitta:', #Not sure if this works, as the keywords are part of the URL, and therefore the URL does not contain a question mark.\r
-# Minor swiss search engines\r
-'searchch', 'q=', 'bluewin', 'qry=',\r
-'zapmeta_ch', 'query=',\r
-'etools_ch', 'query=',\r
-# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines\r
-'pogodak', 'q='\r
-);\r
-\r
-# SearchEnginesKnownUrlNotFound\r
-# Known rules to extract not found keywords from a referrer search engine URL\r
-#------------------------------------------------------------------------------\r
-%SearchEnginesKnownUrlNotFound=(\r
-# Most common search engines\r
-'msn','origq='\r
-);\r
-\r
-# If no rules are known, we take first paramater not into WordsToCleanSearchUrl\r
-#------------------------------------------------------------------------------\r
-@WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look=');\r
-\r
-# SearchEnginesKnownUTFCoding\r
-# Known parameter that proves a search engine has coded its parameters in UTF-8\r
-#------------------------------------------------------------------------------\r
-%SearchEnginesKnownUTFCoding=(\r
-# Most common search engines\r
-'google','ie=utf-8',\r
-'alltheweb','cs=utf-8'\r
-);\r
-\r
-\r
-# SearchEnginesHashLib\r
-# List of search engines names\r
-# 'search_engine_id', 'search_engine_name',\r
-#------------------------------------------------------------------------------\r
-%SearchEnginesHashLib=(\r
-# Major international search engines\r
-'alexa','<a href="http://www.alexa.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Alexa</a>',\r
-'alltheweb','<a href="http://www.alltheweb.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">AllTheWeb</a>',\r
-'altavista','<a href="http://www.altavista.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">AltaVista</a>',\r
-'a9', '<a href="http://www.a9.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">A9</a>',\r
-'dmoz','<a href="http://dmoz.org/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">DMOZ</a>',\r
-'google_products','<a href="http://www.google.com/products" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (Products)</a>',\r
-'google_base','<a href="http://base.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (Base)</a>',\r
-'google_froogle','<a href="http://froogle.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Froogle (Google)</a>',\r
-'google_groups','<a href="http://groups.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (Groups)</a>',\r
-'google_image','<a href="http://images.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (Images)</a>',\r
-'google_cache','<a href="http://www.google.com/help/features.html#cached" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (cache)</a>',\r
-'google','<a href="http://www.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google</a>',\r
-'lycos','<a href="http://www.lycos.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Lycos</a>',\r
-'msn','<a href="http://search.msn.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Microsoft MSN Search</a>',\r
-'live','<a href="http://www.live.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Microsoft Windows Live</a>',\r
-'bing','<a href="http://www.bing.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Microsoft Bing</a>',\r
-'netscape','<a href="http://www.netscape.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Netscape</a>',\r
-'aol','<a href="http://www.aol.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">AOL</a>',\r
-'terra','<a href="http://www.terra.es/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Terra</a>',\r
-'tiscali','<a href="http://search.tiscali.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Tiscali</a>',\r
-'voila','<a href="http://www.voila.fr/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Voila</a>',\r
-'search.com','<a href="http://www.search.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Search.com</a>',\r
-'yahoo_mindset','<a href="http://mindset.research.yahoo.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Yahoo! Mindset</a>',\r
-'yahoo','<a href="http://www.yahoo.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Yahoo!</a>',\r
-'sympatico','<a href="http://sympatico.msn.ca/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Sympatico</a>',\r
-'excite','<a href="http://www.excite.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Excite</a>',\r
-# Minor international search engines\r
-'google4counter','<a href="http://www.4-counter.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">4-counter (Google)</a>',\r
-'att','<a href="http://www.att.net/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">AT&T search (powered by Google)</a>',\r
-'bungeebonesdotcom','<a href="http://BungeeBones.com/search.php/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">BungeeBones</a>',\r
-'go','Go.com',\r
-'askde','<a href="http://de.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Deutschland</a>',\r
-'askes','<a href="http://es.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Espa&ntilde;a</a>', # break out Ask country specific engines.\r
-'askfr','<a href="http://fr.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask France</a>',\r
-'askit','<a href="http://it.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Italia</a>',\r
-'asknl','<a href="http://nl.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Nederland</a>',\r
-'ask','<a href="http://www.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask</a>',\r
-'atomz','Atomz',\r
-'dejanews','DejaNews',\r
-'euroseek','Euroseek',\r
-'findarticles','Find Articles',\r
-'go2net','Go2Net (Metamoteur)',\r
-'hotbot','Hotbot',\r
-'infospace','InfoSpace',\r
-'kvasir','Kvasir',\r
-'looksmart','Looksmart',\r
-'mamma','Mamma',\r
-'metacrawler','MetaCrawler (Metamoteur)',\r
-'nbci','NBCI',\r
-'northernlight','NorthernLight',\r
-'overture','Overture',                 # Replace 'goto\.com','Goto.com',\r
-'dogpile','<a href="http://www.dogpile.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Dogpile</a>',\r
-'spray','Spray',\r
-'teoma','<a href="http://search.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Teoma</a>',                                                      # Replace 'directhit\.com','DirectHit',\r
-'webcrawler','<a href="http://www.webcrawler.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">WebCrawler</a>',\r
-'wisenut','WISENut',\r
-'ixquick','<a href="http://www.ixquick.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">ix quick</a>',\r
-'earthlink', 'Earth Link',\r
-'iune','<a href="http://www.i-une.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">i-une</a>',\r
-'blingo','<a href="http://www.blingo.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Blingo</a>',\r
-'centraldatabase','<a href="http://search.centraldatabase.org/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">GPU p2p search</a>',\r
-'clusty','<a href="http://www.clusty.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Clusty</a>',\r
-'mysearch','<a href="http://www.mysearch.com" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">My Search</a>',\r
-'vivisimo','<a href="http://www.vivisimo.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Vivisimo</a>',\r
-'kartoo','<a href="http://www.kartoo.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Kartoo</a>',\r
-'icerocket','<a href="http://www.icerocket.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Icerocket (Blog)</a>',\r
-'sphere','<a href="http://www.sphere.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Sphere (Blog)</a>',\r
-'ledix','<a href="http://www.ledix.net/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ledix</a>',\r
-'shawca','<a href="http://start.shaw.ca/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Shaw.ca</a>',\r
-'searchalot','<a href="http://www.searchalot.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Searchalot</a>',\r
-'copernic','<a href="http://www.copernic.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Copernic</a>',\r
-'avantfind','<a href="http://www.avantfind.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Avantfind</a>',\r
-'steadysearch','<a href="http://www.avantfind.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Avantfind</a>',\r
-'clarosearch','<a href="http://www.claro-search.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Claro Search</a>',\r
-'searchresults','<a href="http://www1.search-results.com/" rel="nofollow" title="Search-results Home Page [new window]" target="_blank">Search-results</a>',\r
-'holasearch', '<a href="http://www.holasearch.com" rel="nofollow" title="Hola Search Home Page [new window]" target="_blank">Hola Search</a>',\r
-'conduit', '<a href="http://search.conduit.com" rel="nofollow" title="Conduit Search Home Page [new window]" target="_blank">Conduit Search</a>',\r
-'flipora', '<a href="http://static.flipora.com/" rel="nofollow" title="Flipora Home Page [new window]" target="_blank">Flipora</a>',\r
-'delta-search', '<a href="http://www.delta-search.com/" rel="nofollow" title="Delta Search Home Page [new window]" target="_blank">Delta Search</a>',\r
-'iminent', '<a href="http://start.iminent.com/" rel="nofollow" title="Iminent Home Page [new window]" target="_blank">Iminent</a>',\r
-'searchmobileonline', '<a href="http://www.searchmobileonline.com/" rel="nofollow" title="Search Mobile Online (StartApp) Home Page [new window]" target="_blank">Search Mobile Online (StartApp)</a>',\r
-'nortonsavesearch', '<a href="http://int.search-results.com/" rel="nofollow" title="Norton Safe Search Home Page [new window]" target="_blank">Norton Safe Search</a>',\r
-'inbox', '<a href="http://www2.inbox.com/search/results1.aspx" rel="nofollow" title="Inbox Search [new window]" target="_blank">Inbox Search</a>',\r
-'govome', '<a href="http://www.govome.com/" rel="nofollow" title="Govome [new window]" target="_blank">Govome</a>',\r
-'find1friend', '<a href="http://find1friend.com/" rel="nofollow" title="Find1Friend [new window]" target="_blank">Find1Friend</a>',\r
-'mysearchdial', '<a href="http://start.mysearchdial.com/" rel="nofollow" title="My Search Dial [new window]" target="_blank">My Search Dial</a>',\r
-'speedbit', '<a href="http://go.speedbit.com/" rel="nofollow" title="Speedbit [new window]" target="_blank">Speedbit</a>',\r
-'certifiedtoolbarsearch', '<a href="http://search.certified-toolbar.com/" rel="nofollow" title="Certified-Toolbar Search [new window]" target="_blank">Certified-Toolbar Search</a>',\r
-'sweetim', '<a href="http://search.sweetim.com/" rel="nofollow" title="SweetIM Search [new window]" target="_blank">SweetIM Search</a>',\r
-'searchcompletion', '<a href="http://search.searchcompletion.com/" rel="nofollow" title="SearchCompletion Search [new window]" target="_blank">SearchCompletion Search</a>',\r
-'eazelsearch', '<a href="http://en.eazel.com/" rel="nofollow" title="Eazel Search [new window]" target="_blank">Eazel Search</a>',\r
-'searchfunmoods', '<a href="http://sr.searchfunmoods.com/" rel="nofollow" title="Funmoods [new window]" target="_blank">Funmoods</a>',\r
-'googleByIP', '<a href="http://173.194.35.177/" rel="nofollow" title="Google (Access by IP-Address) [new window]" target="_blank">Google (Access by IP-Address)</a>',\r
-'dalesearch',  '<a href="http://www.dalesearch.com/" rel="nofollow" title="Dale Search [new window]" target="_blank">Dale Search</a>',\r
-'sweetpacks', '<a href="http://www.sweetpacks-search.com/" rel="nofollow" title="Sweetpacks [new window]" target="_blank">Sweetpacks</a>',\r
-'searchgol', '<a href="http://www.searchgol.com/" rel="nofollow" title="Search-Gol [new window]" target="_blank">Search-Gol</a>',\r
-'duckduckgo',  '<a href="http://r.duckduckgo.com/" rel="nofollow" title="DuckDuckGo Home Page [new window]" target="_blank">DuckDuckGo (Does not provide search keyphrases, using found page instead)</a>',\r
-'facemoods', '<a href="http://sr.facemoods.com/" rel="nofollow" title="Facemoods Search [new window]" target="_blank">Facemoods Search</a>',\r
-'shoppstop', '<a href="http://www.shoppstop.com/" rel="nofollow" title="ShoppStop [new window]" target="_blank">ShoppStop</a>',\r
-'searchya', '<a href="http://www.searchya.com/" rel="nofollow" title="Searchya [new window]" target="_blank">Searchya</a>',\r
-'picsearch', '<a href="http://www.picsearch.de/" rel="nofollow" title="picsearch [new window]" target="_blank">picsearch</a>',\r
-'webssearches', 'Various variants of Webssearches <a href="http://www.webssearches.com/" rel="nofollow" title="Webssearches [new window]" target="_blank">EMG Technologies</a> and <a href="http://airzip.inspsearch.com/" rel="nofollow" title="airzip.inspsearch.com [new window]" target="_blank">airzip.inspsearch.com</a>',\r
-#Jan 8, 2016: No genuine inspsearch.com search engine seems so exist, but there is a couple of search engines using subdomains of inspsearch.com. Unclear how these are related to each other.\r
-'zapmeta', '<a href="http://www.zapmeta.de/" rel="nofollow" title="ZapMeta [new window]" target="_blank">ZapMeta</a>',\r
-'localmoxie', '<a href="http://www.localmoxie.com/" rel="nofollow" title="Local Moxie [new window]" target="_blank">Local Moxie</a>',\r
-'search-results_mobi', '<a href="http://search-results.mobi/" rel="nofollow" title="search-results.mobi [new window]" target="_blank">search-results.mobi</a>',\r
-'androidsearch', '<a href="http://www.androidsearch.com/" rel="nofollow" title="androidsearch.com [new window]" target="_blank">androidsearch.com</a>',\r
-'isearch_nation_com', '<a href="http://isearch.nation.com/" rel="nofollow" title="Nation Search [new window]" target="_blank">Nation Search</a>',\r
-'search_zonealarm_com', '<a href="http://search.zonealarm.com/" rel="nofollow" title="Zone Alarm Search [new window]" target="_blank">Zone Alarm Search</a>',\r
-'www_buenosearch_com', '<a href="http://www.buenosearch.com/" rel="nofollow" title="BuenoSearch [new window]" target="_blank">BuenoSearch</a>',\r
-'search_foxtab_com',  '<a href="http://search.foxtab.com/" rel="nofollow" title="Foxtab Search [new window]" target="_blank">Foxtab Search</a>',\r
-'searches_qone8_com', '<a href="http://searches.qone8.com/" rel="nofollow" title="Omiga-Plus [new window]" target="_blank">Omiga-Plus</a>',\r
-'startpage_com', '<a href="http://startpage.com/" rel="nofollow" title="Startpage [new window]" target="_blank">Startpage</a>',\r
-'qwant_com', '<a href="https://www.qwant.com/" rel="nofollow" title="qwant.com [new window]" target="_blank">qwant.com</a>',\r
-'safehomepage_com', '<a href="http://searches.safehomepage.com/" rel="nofollow" title="safehomepage.com [new window]" target="_blank">safehomepage.com</a>',\r
-'vi-view_com', '<a href="http://searches.vi-view.com/" rel="nofollow" title="vi-view.com [new window]" target="_blank">vi-view.com</a>',\r
-'wow_utop_it', '<a href="http://wow.utop.it/" rel="nofollow" title="wow.utop.it [new window]" target="_blank">wow.utop.it</a>',\r
-'windowssearch_com', '<a href="http://www.windowssearch.com/search?q=AWStats" rel="nofollow" title="Example page [new window]" target="_blank">windowssearch.com</a>',\r
-'www_wow_com', '<a href="http://www.wow.com/" rel="nofollow" title="www.wow.com [new window]" target="_blank">WOW.com</a>',\r
-'globososo', 'Various variants of Globososo (Kingtale Technology): <a href="http://www.globososo.com/" rel="nofollow" title="Globososo search page [new window]" target="_blank">www</a>, <a href="http://searches.globososo.com/" rel="nofollow" title="searches subdomain of globososo.com [new window]" target="_blank">searches</a>, <a href="http://searches3.globososo.com/" rel="nofollow" title="searches3 subdomain of globososo.com [new window]" target="_blank">searches3</a>, and at inspsearch.com (<a href="http://globososo.inspsearch.com/" rel="nofollow" title="Globososo search page at globososo.inspsearch.com [new window]" target="_blank">globososo</a>, <a href="http://kingtale3.inspsearch.com/" rel="nofollow" title="Globososo search page at kingtale3.inspsearch.com [new window]" target="_blank">kingtale3</a>)',\r
-'swisscows_ch', '<a href="https://swisscows.ch/" rel="nofollow" title="Swisscows search page [new window]" target="_blank">Swisscows</a>',\r
-'preciobarato_xyz', '<a href="http://preciobarato.xyz/" rel="nofollow" title="preciobarato.xyz offline on Oct 2, 2015 [new window]" target="_blank">Yandex</a>',\r
-'www_dregol_com', '<a href="http://www.dregol.com/" rel="nofollow" title="Dregol Search [new window]" target="_blank">Dregol Search</a>',\r
-'search_socialdownloadr_com', '<a href="http://search.socialdownloadr.com/" rel="nofollow" title="Socialdownloadr search page [new window]" target="_blank">Socialdownloadr</a>',\r
-'int_search_myway_com', '<a href="http://int.search.myway.com/" rel="nofollow" title="MyWay search page [new window]" target="_blank">MyWay</a>',\r
-'de_dolphin_com', '<a href="http://de.dolphin.com/" rel="nofollow" title="Dolphin Search search page [new window]" target="_blank">Dolphin Search</a>',\r
-'mys_yoursearch_me', '<a href="http://mys.yoursearch.me/" rel="nofollow" title="Yoursearch.me search page [new window]" target="_blank">Yoursearch.me</a>',\r
-# Chello Portals\r
-'chelloat','<a href="http://www.chello.at/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Austria</a>',\r
-'chellobe','<a href="http://www.chello.be/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Belgium</a>',\r
-'chellocz','<a href="http://www.chello.cz/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Czech Republic</a>',\r
-'chellofr','<a href="http://www.chello.fr/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello France</a>',\r
-'chellohu','<a href="http://www.chello.hu/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Hungary</a>',\r
-'chellonl','<a href="http://www.chello.nl/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Netherlands</a>',\r
-'chellono','<a href="http://www.chello.no/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Norway</a>',\r
-'chellopl','<a href="http://www.chello.pl/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Poland</a>',\r
-'chellose','<a href="http://www.chello.se/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Sweden</a>',\r
-'chellosk','<a href="http://www.chello.sk/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Slovakia</a>',\r
-'chellocom','<a href="http://www.chello.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello (Country not recognized)</a>',\r
-# Mirago\r
-'miragobe','<a href="http://www.mirago.be/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Belgium</a>',\r
-'miragoch','<a href="http://www.mirago.ch/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Switzerland</a>',\r
-'miragode','<a href="http://www.mirago.de/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Germany</a>',\r
-'miragodk','<a href="http://www.mirago.dk/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Denmark</a>',\r
-'miragoes','<a href="http://es.mirago.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Spain</a>',\r
-'miragofr','<a href="http://www.mirago.fr/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago France</a>',\r
-'miragoit','<a href="http://www.mirago.it/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Italy</a>',\r
-'miragonl','<a href="http://www.mirago.nl/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Netherlands</a>',\r
-'miragono','<a href="http://no.mirago.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Norway</a>',\r
-'miragose','<a href="http://www.mirago.se/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Sweden</a>',\r
-'miragocouk','<a href="http://zone.mirago.co.uk/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago UK</a>',\r
-'mirago','<a href="http://www.mirago.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago (country unknown)</a>',\r
-'answerbus','<a href="http://www.answerbus.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Answerbus</a>',\r
-'icq','<a href="http://www.icq.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">icq</a>',\r
-'nusearch','<a href="http://www.nusearch.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Nusearch</a>',\r
-'goodsearch','<a href="http://www.goodsearch.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">GoodSearch</a>',\r
-'scroogle','<a href="http://www.scroogle.org/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Scroogle</a>',\r
-'questionanswering','<a href="http://www.questionanswering.com/" rel="nofollow" title="Questionanswering home page [new window]" target="_blank">Questionanswering</a>',\r
-'mywebsearch','<a href="http://search.mywebsearch.com/" rel="nofollow" title="MyWebSearch home page [new window]" target="_blank">MyWebSearch</a>',\r
-'comettoolbar','<a href="http://as.starware.com/dp/search" rel="nofollow" title="Comet toolbar search home page [new window]" target="_blank">Comet toolbar search</a>',\r
-# Social Bookmarking Services\r
-'delicious','<a href="http://del.icio.us/" rel="nofollow" title="del.icio.us home page [new window]" target="_blank">del.icio.us</a> (Social Bookmark)',\r
-'digg','<a href="http://www.digg.com/" rel="nofollow" title="Digg home page [new window]" target="_blank">Digg</a> (Social Bookmark)',\r
-'stumbleupon','<a href="http://www.stumbleupon.com/" rel="nofollow" title="Stumbleupon home page [new window]" target="_blank">Stumbleupon</a> (Social Bookmark)',\r
-'swik','<a href="http://swik.net/" rel="nofollow" title="Swik home page [new window]" target="_blank">Swik</a> (Social Bookmark)',\r
-'segnalo','<a href="http://segnalo.alice.it/" rel="nofollow" title="Segnalo home page [new window]" target="_blank">Segnalo</a> (Social Bookmark)',\r
-'ineffabile','<a href="http://www.ineffabile.it/" rel="nofollow" title="Ineffabile.it home page [new window]" target="_blank">Ineffabile.it</a> (Social Bookmark)',\r
-# Minor Australian search engines\r
-'anzwers','<a href="http://anzwers.com.au/" rel="nofollow" title="anzwers.com.au home page [new window]" target="_blank">anzwers.com.au</a>',\r
-# Minor brazilian search engines\r
-'engine','Cade', 'miner','Meta Miner',\r
-# Minor chinese search engines\r
-'baidu','<a href="http://www.baidu.com/" rel="nofollow" target="_blank">Baidu</a>',\r
-'iask','<a href="http://www.iask.com/" rel="nofollow" target="_blank">Iask</a>',\r
-'accoona','<a href="http://cn.accoona.com" rel="nofollow" target="_blank">Accoona</a>',\r
-'3721','<a href="http://www.3721.com/" rel="nofollow" target="_blank">3721</a>',\r
-'netease', '<a href="http://www.163.com/" rel="nofollow" target="_blank">NetEase</a>',\r
-'soso','<a href="http://www.soso.com/" rel="nofollow" target="_blank">SoSo</a>',\r
-'zhongsou','<a href="http://www.zhongsou.com/" rel="nofollow" target="_blank">ZhongSou</a>',\r
-'sogou', '<a href="http://www.sogou.com/" rel="nofollow" target="_blank">SoGou</a>',\r
-'vnet','<a href="http://114.vnet.cn/" rel="nofollow" target="_blank">VNet</a>',\r
-# Minor czech search engines\r
-'atlas','Atlas.cz',    'seznam','Seznam', 'quick','Quick.cz', 'centrum','Centrum.cz', 'jyxo','Jyxo.cz', 'najdi','Najdi.to', 'redbox','RedBox.cz',\r
-'avgsearch', '<a href="http://isearch.avg.com" rel="nofollow" target="_blank">AVG Secure Search</a>',\r
-# Minor danish search-engines\r
-'opasia','Opasia', 'danielsen','Thor (danielsen.com)', 'sol','SOL', 'jubii','Jubii', 'finddk','Find', 'edderkoppen','Edderkoppen', 'netstjernen','Netstjernen', 'orbis','Orbis', 'tyfon','Tyfon', '1klik','1Klik', 'ofir','Ofir',\r
-# Minor dutch search engines\r
-'ilse','Ilse','vindex','Vindex\.nl',\r
-# Minor english search engines\r
-'askuk','<a href="http://uk.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask UK</a>',\r
-'bbc','BBC', 'freeserve','Freeserve', 'looksmartuk','Looksmart UK',\r
-'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk',\r
-'fbdownloader','<a href="http://search.fbdownloader.com/" rel="nofollow" title="FBDownloader Home Page [new window]" target="_blank">FBDownloader (fbdownloader)</a>',\r
-'fdownloadr_com', '<a href="http://search.fdownloadr.com/" rel="nofollow" title="FBDownloader Home Page [new window]" target="_blank">FBDownloader (fdownloadr)</a>',\r
-'babylon','<a href="http://search.babylon.com/" rel="nofollow" title="Babylon Home Page [new window]" target="_blank">Babylon</a>',\r
-'allgameshome', '<a href="http://my.allgameshome.com/" rel="nofollow" title="AllGamesHome [new window]" target="_blank">AllGamesHome</a>',\r
-'surfcanyon_com', '<a href="http://search.surfcanyon.com/" rel="nofollow" title="SurfCanyon [new window]" target="_blank">SurfCanyon</a>',\r
-'uk_foxstart_com', '<a href="http://uk.foxstart.com/" rel="nofollow" title="Foxstart search page [new window]" target="_blank">Foxstart.com</a>',\r
-'yandex_com', '<a href="http://yandex.com/" rel="nofollow" title="Yandex search page [new window]" target="_blank">Yandex</a>',\r
-# Minor finnish search engines\r
-'haku','Ihmemaa',\r
-# Minor french search engines\r
-'aolfr','AOL (fr)', 'ctrouve','C\'est trouve', 'francite','Francite', 'lbb', 'LBB', 'libertysurf', 'Libertysurf', 'free', 'Free.fr', 'clubinternet', 'Club-internet',\r
-'toile', 'Toile du Quebec',\r
-'biglotron','<a href="http://www.biglotron.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Biglotron</a>',\r
-'mozbot','<a href="http://www.mozbot.fr/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mozbot</a>',\r
-# Minor German search engines\r
-'aolde','AOL (de)',\r
-'o2aolde', '<a href="http://o2suche.aol.de/aol/search" rel="nofollow" title="o2 Suche Home Page [new window]" target="_blank">o2 Suche</a>',\r
-'fireball','Fireball', 'infoseek','Infoseek',\r
-'webde','<a href="http://suche.web.de" rel="nofollow" title="Web.de Home Page [new window]" target="_blank">Web.de</a>',\r
-'abacho','Abacho',\r
-'t-online','T-Online',\r
-'allesklar','allesklar.de', 'meinestadt','meinestadt.de',\r
-'metaspinner','metaspinner',\r
-'metacrawler_de','metacrawler.de',\r
-'wwweasel','<a href="http://wwweasel.de/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">WWWeasel</a>',\r
-'netluchs','<a href="http://www.netluchs.de/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Netluchs</a>',\r
-'schoenerbrausen','<a href="http://www.schoenerbrausen.de/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Schoenerbrausen/</a>',\r
-'gmxsuche', '<a href="http://suche.gmx.net/" rel="nofollow" title="GMX Suche Home Page [new window]" target="_blank">GMX Suche</a>',\r
-'gmxsuche_at', '<a href="http://suche.gmx.at/" rel="nofollow" title="GMX Suche Oesterreich Home Page [new window]" target="_blank">GMX Suche Oesterreich</a>',\r
-'ecosiasearch', '<a href="http://ecosia.org" rel="nofollow" title="Ecosia Search Home Page [new window]" target="_blank">Ecosia Search</a>',\r
-'aolsearch', '<a href="http://de.aolsearch.com/" rel="nofollow" title="AOL Search Home Page [new window]" target="_blank">AOL Search</a>',\r
-'aolsuche', '<a href="http://suche.aol.de/" rel="nofollow" title="AOL Suche Home Page [new window]" target="_blank">AOL Suche</a>',\r
-'startxxl', '<a href="http://www.startxxl.com/" rel="nofollow" title="StartXXL Home Page [new window]" target="_blank">StartXXL</a>',\r
-'benefind', '<a href="http://www.benefind.de/" rel="nofollow" title="benefind Home Page [new window]" target="_blank">benefind</a>',\r
-'amazonsearch', '<a href="http://www.amazon.de/gp/bit/apps/web/SERP/search/" rel="nofollow" title="Amazon Web Search Home Page [new window]" target="_blank">Amazon Web Search</a>',\r
-'wowsearch', '<a href="http://de.wow.com/" rel="nofollow" title="Wow Search Home Page [new window]" target="_blank">Wow Search</a>',\r
-'vlips_de', '<a href="http://www.vlips.de/semags_proxy.php" rel="nofollow" title="vlips Proxy Page (Homepage not yet ready?) [new window]" target="_blank">vlips.de</a>',\r
-'metager', '<a href="http://www.metager.de" rel="nofollow" title="MetaGer Home Page [new window]" target="_blank">MetaGer</a>',\r
-'search_1und1_de', '<a href="http://search.1und1.de/" rel="nofollow" title="1&amp;1 Suche [new window]" target="_blank">1&amp;1 Suche (subdomain "search")</a>',\r
-'smde', '<a href="http://www.sm.de/" rel="nofollow" title="SM.de - Die SuchMaschine [new window]" target="_blank">SM.de - Die SuchMaschine</a>',\r
-'sumaja',  '<a href="http://www.sumaja.de/" rel="nofollow" title="Sumaja [new window]" target="_blank">Sumaja</a>',\r
-'navigationshilfe', '<a href="http://navigationshilfe.t-online.de/" rel="nofollow" title="T-Online Navigationshilfe [new window]" target="_blank">T-Online Navigationshilfe</a>',\r
-'umfis', '<a href="http://www.umfis.de/" rel="nofollow" title="UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland [new window]" target="_blank">UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland</a>',\r
-'fastbot_de', '<a href="http://fastbot.de/" rel="nofollow" title="Fastbot.de [new window]" target="_blank">Fastbot.de (Does not provide search keyphrases, using found page instead)</a>',\r
-'tixuma_de', '<a href="http://www.tixuma.de/" rel="nofollow" title="Tixuma Deutschland [new window]" target="_blank">Tixuma Deutschland</a>',\r
-'freenet_de', '<a href="http://suche.freenet.de/" rel="nofollow" title="suche.freenet.de [new window]" target="_blank">suche.freenet.de</a>',\r
-'izito_de',  '<a href="http://www.izito.de/" rel="nofollow" title="iZito Deutschland [new window]" target="_blank">iZito Deutschland</a>',\r
-'peoplecheck_de', '<a href="http://peoplecheck.de/" rel="nofollow" title="PeopleCheck.de [new window]" target="_blank">PeopleCheck.de</a>',\r
-'oneseek_de', '<a href="http://www.oneseek.de" rel="nofollow" title="Metasuchmaschine OneSeek.de [new window]" target="_blank">Metasuchmaschine OneSeek.de</a>',\r
-'de_wiki_gov_cn', '<a href="http://de.wiki.gov.cn/" rel="nofollow" title="Wiki Sucher [new window]" target="_blank">Wiki Sucher</a>',\r
-'umuwa_de', '<a href="http://umuwa.de/" rel="nofollow" title="Umuwa Deutschland [new window]" target="_blank">Umuwa Deutschland</a>',\r
-'1und1_de', '<a href="http://suche.1und1.de/" rel="nofollow" title="1&amp;1 Suche [new window]" target="_blank">1&amp;1 Suche (subdomain "suche")</a>',\r
-'metasuche_ch', '<a href="http://www.metasuche.ch/" rel="nofollow" title="Metasuche search page [new window]" target="_blank">Metasuche.ch</a>',\r
-# Minor hungarian search engines\r
-'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkereso', 'goliat','Goliat', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Kereso',\r
-'tango_hu','<a href="http://tango.hu/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Tango</a>',\r
-'keresolap_hu','<a href="http://keresolap.hu/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Tango keresolap</a>',\r
-'startlap_hu','<a href="http://kereso.startlap.hu/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Startlab Kereso</a>',\r
-'polymeta_hu','<a href="http://www.polymeta.hu/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Polymeta</a>',\r
-# Minor Indian search engines\r
-'sify','<a href="http://search.sify.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Sify</a>',\r
-# Minor Italian search engines\r
-'virgilio','<a href="http://www.virgilio.it/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Virgilio</a>',\r
-'arianna','<a href="http://arianna.libero.it/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Arianna</a>',\r
-'supereva','<a href="http://search.supereva.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Supereva</a>',\r
-'kataweb','<a href="http://www.kataweb.it/ricerca/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Kataweb</a>',\r
-'aliceitmaster','<a href="http://www.alice.it/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">search.alice.it.master</a>',\r
-'aliceit','<a href="http://www.alice.it/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">alice.it</a>',\r
-'gotuneed','<a href="http://www.gotuneed.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">got u need</a>',\r
-'godado','Godado.it',\r
-'jumpy\.it','Jumpy.it',\r
-'shinyseek\.it','Shinyseek.it',\r
-'teecnoit','<a href="http://www.teecno.it/" rel="nofollow" title="Teecno home page [new window]" target="_blank">Teecno</a>',\r
-# Minor Israeli search engines\r
-'genieo','<a href="http://search.genieo.com/" rel="nofollow" title="Genieo home page [new window]" target="_blank">Genieo</a>',\r
-# Minor Japanese search engines\r
-'askjp','<a href="http://www.ask.jp/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Japan</a>',\r
-'sagool','<a href="http://sagool.jp/" rel="nofollow" title="Sagool home page [new window]" target="_blank">Sagool</a>',\r
-'rakuten', '<a href="http://websearch.rakuten.co.jp" rel="nofollow" title="websearch.rakuten.co.jp [new window]" target="_blank">websearch.rakuten.co.jp</a>',\r
-# Minor Norwegian search engines\r
-'start','start.no', 'eniro','<a href="http://www.eniro.no/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Eniro</a>',\r
-# Minor polish search engines\r
-'wp','<a href="http://szukaj.wp.pl/" rel="nofollow" title="Wirtualna Polska home page [new window]" target="_blank">Wirtualna Polska</a>',\r
-'onetpl','<a href="http://szukaj.onet.pl/" rel="nofollow" title="Onet.pl home page [new window]" target="_blank">Onet.pl</a>',\r
-'dodajpl','<a href="http://www.dodaj.pl/" rel="nofollow" title="Dodaj.pl home page [new window]" target="_blank">Dodaj.pl</a>',\r
-'gazetapl','<a href="http://szukaj.gazeta.pl/" rel="nofollow" title="Gazeta.pl home page [new window]" target="_blank">Gazeta.pl</a>',\r
-'gerypl','<a href="http://szukaj.gery.pl/" rel="nofollow" title="Gery.pl home page [new window]" target="_blank">Gery.pl</a>',\r
-'hogapl','<a href="http://www.hoga.pl/" rel="nofollow" title="Hoga.pl home page [new window]" target="_blank">Hoga.pl</a>',\r
-'netsprintpl','<a href="http://www.netsprint.pl/" rel="nofollow" title="NetSprint.pl home page [new window]" target="_blank">NetSprint.pl</a>',\r
-'interiapl','<a href="http://www.google.interia.pl/" rel="nofollow" title="Interia.pl home page [new window]" target="_blank">Interia.pl</a>',\r
-'katalogonetpl','<a href="http://katalog.onet.pl/" rel="nofollow" title="Katalog.Onet.pl home page [new window]" target="_blank">Katalog.Onet.pl</a>',\r
-'o2pl','<a href="http://szukaj2.o2.pl/" rel="nofollow" title="o2.pl home page [new window]" target="_blank">o2.pl</a>',\r
-'polskapl','<a href="http://szukaj.polska.pl/" rel="nofollow" title="Polska home page [new window]" target="_blank">Polska</a>',\r
-'szukaczpl','<a href="http://www.szukacz.pl/" rel="nofollow" title="Szukacz home page [new window]" target="_blank">Szukacz</a>',\r
-'wowpl','<a href="http://szukaj.wow.pl/" rel="nofollow" title="Wow.pl home page [new window]" target="_blank">Wow.pl</a>',\r
-# Minor russian search engines\r
-'yandex', 'Yandex', 'aport', 'Aport', 'rambler', 'Rambler', 'turtle', 'Turtle', 'metabot', 'MetaBot',\r
-'mailru','<a href="http://go.mail.ru/" rel="nofollow" title="Mail.Ru home page [new window]" target="_blank">Mail.Ru</a>',\r
-# Minor Swedish search engines\r
-'passagen','Evreka',\r
-'enirose','<a href="http://www.eniro.se/" rel="nofollow" title="Eniro Sverige home page [new window]" target="_blank">Eniro Sverige</a>',\r
-# Minor Slovak search engines\r
-'zoznam','<a href="http://www.zoznam.sk/" rel="nofollow" title="Zoznam search engine home page [new window]" target="_blank">Zoznam</a>',\r
-# Minor Portuguese search engines\r
-'sapo','<a href="http://www.sapo.pt/" rel="nofollow" title="Sapo search engine home page [new window]" target="_blank">Sapo</a>',\r
-# Minor Swiss search engines\r
-'searchch', 'search.ch', 'bluewin', 'search.bluewin.ch',\r
-'zapmeta_ch', '<a href="http://www.zapmeta.ch/" rel="nofollow" title="ZapMeta.ch [new window]" target="_blank">ZapMeta.ch</a>',\r
-'etools_ch', '<a href="http://www.etools.ch/" rel="nofollow" title="eTools.ch [new window]" target="_blank">eTools.ch</a>',\r
-# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines\r
-'pogodak','<a href="http://www.pogodak.com" rel="nofollow" title="Pogodak search engine home page [new window]" target="_blank">Pogodak.com</a>',\r
-# Generic search engines\r
-'search','Unknown search engines'\r
-);\r
-\r
-\r
-# Sanity check.\r
-# Enable this code and run perl search_engines.pm to check file entries are ok\r
-#-----------------------------------------------------------------------------\r
-#foreach my $key (@SearchEnginesSearchIDOrder_list1) {\r
-#      if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID");\r
-#      foreach my $key2 (@SearchEnginesSearchIDOrder_list2) { if ($key2 eq $key) { error("$key is in 1 and 2\n"); } }\r
-#      foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 1 and gen\n"); } }\r
-#} }\r
-#foreach my $key (@SearchEnginesSearchIDOrder_list2) {\r
-#      if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID");\r
-#      foreach my $key2 (@SearchEnginesSearchIDOrder_list1) { if ($key2 eq $key) { error("$key is in 2 and 1\n"); } }\r
-#      foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 2 and gen\n"); } }\r
-#} }\r
-#foreach my $key (@SearchEnginesSearchIDOrder_listgen) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_listgen with no value in SearchEnginesHashID"); } }\r
-#foreach my $key (keys %NotSearchEnginesKeys) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in NotSearchEnginesKeys with no value in SearchEnginesHashID"); } }\r
-#foreach my $key (keys %SearchEnginesKnownUrl) {\r
-#      my $found=0;\r
-#      foreach my $key2 (values %SearchEnginesHashID) {\r
-#              if ($key eq $key2) { $found=1; last; }\r
-#      }\r
-#      if (! $found) { die "Entry '$key' has been found in SearchEnginesKnownUrl with no value in SearchEnginesHashID"; }\r
-#}\r
-#foreach my $key (keys %SearchEnginesHashLib) {\r
-#      my $found=0;\r
-#      foreach my $key2 (values %SearchEnginesHashID) {\r
-#              if ($key eq $key2) { $found=1; last; }\r
-#      }\r
-#      if (! $found) { die "Entry '$key' has been found in SearchEnginesHashLib with no value in SearchEnginesHashID"; }\r
-#}\r
-#print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen;\r
-\r
-1;\r
+# AWSTATS SEARCH ENGINES DATABASE
+#------------------------------------------------------------------------------
+# If you want to add a Search Engine to extend AWStats database detection capabilities,
+# you must add an entry in SearchEnginesSearchIDOrder, SearchEnginesHashID and in
+# SearchEnginesHashLib.
+# An entry if known in SearchEnginesKnownUrl is also welcome.
+#
+# to eldy: Please check if the following description is correct:
+# You need the following information to specify a search engine:
+# (a) A regular expression that matches the referrer string of the
+#    search engine. Unclear: What about slashes in the name of
+#    a search engine, e.g. as in 'ecosia.com/search'. Seems that
+#    AWStats will non find search strings containing a slash.
+#    Maybe use a search string without a slash, and - if necessary -
+#    an entry in %NotSearchEnginesKeys , if this search string
+#    matches entries that are not search engines.
+#    Example of a web address of a Amazon search engine:
+#    http://www.amazon.de/gp/bit/apps/web/SERP/search/ref=bit_bds-p24_serp_cr_de?ie=UTF8tagbase=bds-p24&query=deutsch+8.+klasse+gymnasium+protokoll
+# (b) A unique string to identify the search engine within AWStats
+# (c) A regular expression that finds the start of the query part in the
+#    referrer string
+# (d) A HTML-fragment that goes into the reports generated by AWStats which
+#    identifies the search engine to human reader of the report. In the
+#    simplest case this is a string containing the name of the search
+#    engine. You can also provide a hypertext clause that presents the
+#    name together with a link to the search engine.
+#
+# The regular expression (a) goes into SearchEnginesSearchIDOrder_list1
+# or ..._list2. List 1 contains common search engines, list 2 those
+# that are not so often used.
+#
+# SearchEnginesHashID contains to consecutive entries for each search
+# engine: The regular expression (a) followed bei the search engine
+# identifier (b)
+#
+# SearchEnginesKnownUrl specifies how to find the start of the query.
+# For each search engine you enter the search engine identifier (b)
+# followed by the regular expression (c). Unclear: It is possible to
+# omit this entry. If you do this, how will AWStats find the start of
+# the query?
+#
+# SearchEnginesHashLib contains also two entries for each search engine:
+# The search engine identifier (b) followed by the HTML-Fragment (d)
+#
+# There are search engines that do not use a query part in their URLs.
+# They put the search expression in the main part of the URL instead.
+# AWStats is able to handle these cases. They are specified as described
+# above, except the following two things:
+# - The regular expression (c) searches the complete URL and not only
+#   the query part.
+# - An additional Entry in the list %SearchEnginesWithKeysNotInQuery is
+#   necessary.
+#
+#
+# AWStats runs a sanity check of the contents of search_engines.pm. This
+# check detects the following things:
+# - Inconsistencies (number of entries)
+# It does not detect the following errors:
+# - If the HTML-Fragment (d) is syntactically incorrect.
+#
+#------------------------------------------------------------------------------
+
+# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html
+#            added minor italian search engines
+#                  arianna http://arianna.libero.it/
+#                  supereva http://search.supereva.com/
+#                  kataweb http://kataweb.it/
+#            corrected uk looksmart
+#                  'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=',
+#            to
+#                  'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=',
+#            corrected spelling
+#                     internationnal -> international
+#            added 'google\.'=>'mail\.google\.', to NotSearchEnginesKeys in order to
+#            avoid counting gmail referrals as search engine traffic
+# 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html
+#            avoid counting babelfish.altavista referrals as search engine traffic
+#            avoid counting translate.google referrals as search engine traffic
+# 2005-11-20 Sean Carlos
+#           added missing 'tiscali','key=', entry.  Check order
+# 2005-11-22 Sean Carlos
+#           added Google Base & Froogle.  Froogle not tested.
+# 2006-04-18 Sean Carlos http://www.antezeta.com/awstats.html
+#           added biglotron.com (France)
+#           added blingo http://www.blingo.com/
+#           added Clusty & Vivisimo
+#           added eniro.no (Norway) [https://sourceforge.net/forum/message.php?msg_id=3134783]
+#           added GPU p2p search http://search.centraldatabase.org/
+#           added mail.tiscali to "not search engines list" [https://sourceforge.net/forum/message.php?msg_id=3166688]
+#           added Ask group's "mysearch"
+#           added sify.com (India)
+#           added sogou.com (Cina) [https://sourceforge.net/forum/message.php?msg_id=3501603]
+#           Ask changes:
+#           - added Ask Japan (ask.jp)
+#           - break out Ask new country level variants (DE, ES, FR, IT, NL)
+#           - updated Ask name from Ask Jevees
+#           - added Ask q= parameter - many recent searches probably not recognized; [https://sourceforge.net/forum/message.php?msg_id=3465444]
+#           - updated Ask uk (new uk.ask.com added to older ask.co.uk)
+#           updated voila kw|rdata parameter [https://sourceforge.net/forum/message.php?msg_id=3373912]
+#           for each new engine, added link to Search Engine.  This serves to document engine. Done for major & Italian engines as well. Requires patch
+#              to AWStats to allow untranslated html.  Otherwise html will appear instead of link.
+#           reviewed mnoGoSearch (http://www.mnogosearch.org/); the search engined mentioned no longer
+#              exists https://sourceforge.net/forum/message.php?msg_id=3025426
+# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html
+#            added 10 Chello European broadband portals (Austria, Belgium, Czech Republic, France, Hungary, The Netherlands, Norway, Poland, Slovakia, Sweden)
+#           added Alice Internal Search (blends data with Google?) search.alice.it.master:10005
+#            added detection of google cache views from IPs 66.249.93.104 72.14.203.104 72.14.207.104
+#              To do: add more extensive IP list; keywords not yet detected.
+#            added icerocket.com blog search http://www.icerocket.com/
+#           added live.com (msn) http://www.live.com/
+#           added Meta motor kartoo.  Note: Kartoo does not provide search words in referrers, thus the engine will appear in the
+#              search engine list but the actual search words are not available.
+#           added netluchs.de http://www.netluchs.de/
+#           added sphere.com blog search http://www.sphere.com/
+#           added wwweasel.de http://wwweasel.de
+#           added Yahoo Mindset! http://mindset.research.yahoo.com/
+#            updated Mirago query parameter recognition (qry=); added breakout for each country (France, Germany, Spain, Italy, Norway, Sweden, Denmark, Netherlands, Belgium, Switzerland)
+# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html
+#           added Google cache IPs 64.233.183.104 & 66.102.7.104
+# 2006-05-20 Sean Carlos http://www.antezeta.com/awstats.html
+#              anzwers.com.au
+#              schoenerbrausen.de http://www.schoenerbrausen.de/
+#              added Google cache IP 216.239.59.104
+#              answerbus http://www.answerbus.com/ (does not provide keywords)
+# 2006-05-23 Sean Carlos http://www.antezeta.com/awstats.html
+#              added Google cache IP 66.102.9.104, 64.233.161.104
+# 2006-06-23 Sean Carlos http://www.antezeta.com/awstats.html
+#              added Alice Search search.alice.it
+#              added GoodSearch http://www.goodsearch.com/ (does not provide keywords) "a Yahoo-powered search engine that donates money to your favorite charity or school each time you search the web"
+#              added googlee.com, variant of Google
+#              added gotuneed http://www.gotuneed.com/ Italian search engine, in beta
+#              added icq.com
+#              added logic to parse Google Cache search keywords. Seems to work for alpha but not numeric cache IDs, i.e. search?q=cache:lWVLmnuGJswJ: is recognized but q=cache:Yv5qxeJNuhgJ: is not recognized. The URL triggering the keywords will also appear.  The URLs are probably too varied to parse out?
+#              added Nusearch http://www.nusearch.com/
+#              added Polymeta www.polymeta.hu (does not provide keywords)
+#              added scroogle http://www.scroogle.org/ (does not always provide keywords)
+#              added Tango http://tango.hu/search.php?st=0&q=jeles+napok
+#              Changed Google Cache notation 64\.233\.(161|167|179|183|187)\.104 to 64\.233\.1[0-9]{2}\.104
+#                                            72\.14\.(203|205|207|209|221)\.104 to 72\.14\.2[0-9]{2}\.104
+#                                            216\.239\.(51|59)\.104 to 216\.239\.5[0-9]\.104
+#                                            66\.102\.(7|9)\.104 to 66\.102\.[1-9]\.104
+# 2006-06-27 Sean Carlos http://www.antezeta.com/awstats.html
+#              added Onet.pl http://szukaj.onet.pl/
+#              corrected name "Wirtualna Polska" from "Szukaj" (search); added link http://szukaj.wp.pl/
+# 2006-06-30 Sean Carlos http://www.antezeta.com/awstats.html
+#      Additional Polish Search Engines:
+#      added Dodaj.pl http://www.dodaj.pl/
+#      added Gazeta.pl http://szukaj.gazeta.pl/
+#      added Gery.pl http://szukaj.gery.pl/
+#      added Hoga.pl http://www.hoga.pl/
+#      added Interia.pl http://www.google.interia.pl/
+#      added Katalog.Onet.pl http://katalog.onet.pl/
+#      added NetSprint.pl http://www.netsprint.pl/
+#      added o2.pl http://szukaj2.o2.pl/
+#      added Polska http://szukaj.polska.pl/
+#      added Szukacz http://www.szukacz.pl/
+#      added Wow.pl http://szukaj.wow.pl/
+#      added Sagool http://sagool.jp/
+
+# 2006-08-25 Social Bookmarks
+#      International
+#      added del.icio.us/search - for now, just search referrer. To do: consider /tag/(tagname) referrer?
+#      added stumbleupon.com - No keywords supplied.
+#      added swik.net
+#       added digg. Keywords sometimes supplied.
+#      Italy
+#      added segnalo.alice.it - No keywords supplied.
+#      added ineffabile.it - No keywords supplied.
+
+#       added filter for google groups.  Attempt to parse group name as keyword.
+
+# 2006-09-14
+#      added Eniro Sverige http://www.eniro.se/
+#      added MyWebSearch http://search.mywebsearch.com/
+#      added Teecno http://www.teecno.it/ Italian Open Source Search Engine
+
+#package AWSSE;
+
+# 2006-09-25 (Gabor Moizes)
+# added 4-counter (Google alternative) http://4-counter.com/
+# added Googlecom (Google alternative) http://googlecom.com/
+# added Goggle (Google alternative) http://goggle.co.hu/
+# added Comet toolbar http://as.starware.com
+# added new IP for Yahoo: 216.109.125.130
+# added Ledix http://ledix.net/
+# added AT&T search (powered by Google) http://www.att.net/
+# added Keresolap (Hungarian search engine) http://www.keresolap.hu/
+# added Mozbot (French search engine) http://www.mozbot.fr/
+# added Zoznam (Slovak search engine) http://www.zoznam.sk/
+# added sapo.pt (Portuguese search engine) http://www.sapo.pt/
+# added shaw.ca (powered by Google) http://start.shaw.ca/
+# added Searchalot http://www.searchalot.com/
+# added Copernic http://www.copernic.com/
+# added 216.109.125.130 to Yahoo
+# added 66.218.69.11 to Yahoo
+# added Avantfind http://www.avantfind.com/
+# added Steadysearch http://www.steadysearch.com/
+# added Steadysearch http://www.steady-search.com/
+# modified 216\.239\.5[0-9]\.104/search to 216\.239\.5[0-9]\.104
+
+
+# SearchEnginesSearchIDOrder
+# It contains all matching criteria to search for in log fields. This list is
+# used to know in which order to search Search Engines IDs.
+# Most frequent one are in list1, used when LevelForSearchEnginesDetection is 1 or more
+# Minor robots are in list2, used when LevelForSearchEnginesDetection is 2 or more
+# Note: Regex IDs are in lower case and ' ' and '+' are changed into '_'
+#------------------------------------------------------------------------------
+@SearchEnginesSearchIDOrder_list1=(
+# Major international search engines
+'google\.[\w.]+/products',
+'base\.google\.',
+'froogle\.google\.',
+'groups\.google\.',
+'images\.google\.',
+'google\.',
+'googlee\.',
+'googlecom\.com',
+'goggle\.co\.hu',
+'216\.239\.32\.20',
+'173\.194\.32\.223',
+'216\.239\.(35|37|39|51)\.100',
+'216\.239\.(35|37|39|51)\.101',
+'216\.239\.5[0-9]\.104',
+'64\.233\.1[0-9]{2}\.104',
+'66\.102\.[1-9]\.104',
+'66\.249\.93\.104',
+'72\.14\.2[0-9]{2}\.104',
+'msn\.',
+'live\.com',
+'bing\.',
+'voila\.',
+'mindset\.research\.yahoo',
+'yahoo\.','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)',
+'search\.aol\.co',
+'tiscali\.',
+'lycos\.',
+'alexa\.com',
+'alltheweb\.com',
+'altavista\.',
+'a9\.com',
+'dmoz\.org',
+'netscape\.',
+'search\.terra\.',
+'www\.search\.com',
+'search\.sli\.sympatico\.ca',
+'excite\.'
+);
+
+@SearchEnginesSearchIDOrder_list2=(
+# Minor international search engines
+'4\-counter\.com',
+'att\.net',
+'bungeebonesdotcom',
+'northernlight\.',
+'hotbot\.',
+'kvasir\.',
+'webcrawler\.',
+'metacrawler\.',
+'go2net\.com',
+'(^|\.)go\.com',
+'euroseek\.',
+'looksmart\.',
+'spray\.',
+'nbci\.com\/search',
+'de\.ask.\com', # break out Ask country specific engines.  (.jp is in Japan section)
+'es\.ask.\com',
+'fr\.ask.\com',
+'it\.ask.\com',
+'nl\.ask.\com',
+'uk\.ask.\com',
+'(^|\.)ask\.com',
+'atomz\.',
+'overture\.com',               # Replace 'goto\.com','Goto.com',
+'teoma\.',
+'findarticles\.com',
+'infospace\.com',
+'mamma\.',
+'dejanews\.',
+'dogpile\.com',
+'wisenut\.com',
+'ixquick\.com',
+'search\.earthlink\.net',
+'i-une\.com',
+'blingo\.com',
+'centraldatabase\.org',
+'clusty\.com',
+'mysearch\.',
+'vivisimo\.com',
+'kartoo\.com',
+'icerocket\.com',
+'sphere\.com',
+'ledix\.net',
+'start\.shaw\.ca',
+'searchalot\.com',
+'copernic\.com',
+'avantfind\.com',
+'steadysearch\.com',
+'steady-search\.com',
+'claro-search\.com',
+'www1\.search-results\.com',
+'www\.holasearch\.com',
+'search\.conduit\.com',
+'static\.flipora\.com',
+'(?:www[12]?|mixidj)\.delta-search\.com',
+'start\.iminent\.com',
+'www\.searchmobileonline\.com',
+'int\.search-results\.com',
+'www2\.inbox\.com',
+'www\.govome\.com',
+'find1friend\.com',
+'start\.mysearchdial\.com',
+'go\.speedbit\.com',
+'search\.certified-toolbar\.com',
+'search\.sweetim\.com',
+'search\.searchcompletion\.com',
+'en\.eazel\.com',
+'sr\.searchfunmoods\.com',
+'173\.194\.35\.177',
+'dalesearch\.com',
+'sweetpacks-search\.com',
+'searchgol\.com',
+'duckduckgo\.com',
+'sr\.facemoods\.com',
+'shoppstop\.com',
+'searchya\.com',
+'picsearch\.de',
+'webssearches\.com',
+'airzip\.inspsearch\.com',
+'zapmeta\.de',
+'localmoxie\.com',
+'search-results\.mobi',
+'androidsearch\.com',
+'isearch\.nation\.com',
+'search\.zonealarm\.com',
+'www\.buenosearch\.com',
+'search\.foxtab\.com',
+'searches\.qone8\.com',
+'startpage\.com',
+'www\.qwant\.com',
+'searches\.safehomepage\.com',
+'searches\.vi-view\.com',
+'wow\.utop\.it',
+'windowssearch\.com',
+'www\.wow\.com',
+'globososo\.',
+'kingtale3\.inspsearch\.com',
+'swisscows\.ch',
+'preciobarato\.xyz',
+'www\.dregol\.com',
+'search\.socialdownloadr\.com',
+'int\.search\.myway\.com',
+'de\.dolphin\.com',
+'mys\.yoursearch\.me',
+# Chello Portals
+'chello\.at',
+'chello\.be',
+'chello\.cz',
+'chello\.fr',
+'chello\.hu',
+'chello\.nl',
+'chello\.no',
+'chello\.pl',
+'chello\.se',
+'chello\.sk',
+'chello', # required as catchall for new countries not yet known
+# Mirago
+'mirago\.be',
+'mirago\.ch',
+'mirago\.de',
+'mirago\.dk',
+'es\.mirago\.com',
+'mirago\.fr',
+'mirago\.it',
+'mirago\.nl',
+'no\.mirago\.com',
+'mirago\.se',
+'mirago\.co\.uk',
+'mirago', # required as catchall for new countries not yet known
+'answerbus\.com',
+'icq\.com\/search',
+'nusearch\.com',
+'goodsearch\.com',
+'scroogle\.org',
+'questionanswering\.com',
+'mywebsearch\.com',
+'as\.starware\.com',
+# Social Bookmarking Services
+'del\.icio\.us',
+'digg\.com',
+'stumbleupon\.com',
+'swik\.net',
+'segnalo\.alice\.it',
+'ineffabile\.it',
+# Minor Australian search engines
+'anzwers\.com\.au',
+# Minor brazilian search engines
+'engine\.exe', 'miner\.bol\.com\.br',
+# Minor chinese search engines
+'\.baidu\.com',     # baidu search portal
+'\.vnet\.cn',       # powered by MSN
+'\.soso\.com',      # powered by Google
+'\.sogou\.com',     # powered by Sohu
+'\.3721\.com',      # powered by Yahoo!
+'iask\.com',        # powered by Sina
+'\.accoona\.com',   # Accoona
+'\.163\.com',       # powered by Google
+'\.zhongsou\.com',  # zhongsou search portal
+# Minor czech search engines
+'atlas\.cz','seznam\.cz','quick\.cz','centrum\.cz','jyxo\.(cz|com)','najdi\.to','redbox\.cz',
+'isearch\.avg\.com',
+# Minor danish search-engines
+'opasia\.dk', 'danielsen\.com', 'sol\.dk', 'jubii\.dk', 'find\.dk', 'edderkoppen\.dk', 'netstjernen\.dk', 'orbis\.dk', 'tyfon\.dk', '1klik\.dk', 'ofir\.dk',
+# Minor dutch search engines
+'ilse\.','vindex\.',
+# Minor english search engines
+'(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk',
+'search\.fbdownloader\.com',
+'search\.fdownloadr\.com',
+'search\.babylon\.com',
+'my\.allgameshome\.com',
+'surfcanyon\.com',
+'uk\.foxstart\.com',
+'yandex\.com',
+# Minor finnish search engines
+'haku\.www\.fi',
+# Minor french search engines
+'recherche\.aol\.fr','ctrouve\.','francite\.','\.lbb\.org','rechercher\.libertysurf\.fr', 'search[\w\-]+\.free\.fr', 'recherche\.club-internet\.fr',
+'toile\.com', 'biglotron\.com',
+'mozbot\.fr',
+# Minor german search engines
+'sucheaol\.aol\.de',
+'o2suche\.aol\.de',
+'fireball\.de','infoseek\.de','suche\d?\.web\.de','[a-z]serv\.rrzn\.uni-hannover\.de',
+'suchen\.abacho\.de','(brisbane|suche)\.t-online\.de','allesklar\.de','meinestadt\.de',
+'212\.227\.33\.241',
+'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)',
+'wwweasel\.de',
+'netluchs\.de',
+'schoenerbrausen\.de',
+'suche\.gmx\.net',
+'suche\.gmx\.at',
+'ecosia\.org',
+'de\.aolsearch\.com',
+'suche\.aol\.de',
+'www\.startxxl\.com',
+'www\.benefind\.de',
+'www\.amazon\.de.*search', #Just as a reminder, probably will not work as AWstats seems to consider the host part of an URL only
+'de\.wow\.com',
+'www\.vlips\.de',
+'metager\.de',
+'search\.1und1\.de',
+'sm\.de',
+'sumaja\.de',
+'navigationshilfe\.t-online\.de',
+'umfis\.de',
+'fastbot\.de',
+'tixuma\.de',
+'suche\.freenet\.de',
+'www\.izito\.de',
+'extern\.peoplecheck\.de',
+'www\.oneseek\.de',
+'de\.wiki\.gov\.cn',
+'umuwa\.de',
+'suche\.1und1\.de',
+'www\.metasuche\.ch',
+# Minor Hungarian search engines
+'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu',
+'tango\.hu',
+'keresolap\.hu',
+'kereso\.startlap\.hu',
+'polymeta\.hu',
+# Minor Indian search engines
+'sify\.com',
+# Minor Italian search engines
+'virgilio\.it','arianna\.libero\.it','supereva\.com','kataweb\.it','search\.alice\.it\.master','search\.alice\.it','gotuneed\.com',
+'godado','jumpy\.it','shinyseek\.it','teecno\.it',
+# Minor Israeli search engines
+'search\.genieo\.com',
+# Minor Japanese search engines
+'ask\.jp','sagool\.jp',
+'websearch\.rakuten\.co\.jp',
+# Minor Norwegian search engines
+'sok\.start\.no', 'eniro\.no',
+# Minor Polish search engines
+'szukaj\.wp\.pl','szukaj\.onet\.pl','dodaj\.pl','gazeta\.pl','gery\.pl','hoga\.pl','netsprint\.pl','interia\.pl','katalog\.onet\.pl','o2\.pl','polska\.pl','szukacz\.pl','wow\.pl',
+# Minor russian search engines
+'ya(ndex)?\.ru', 'aport\.ru', 'rambler\.ru', 'turtle\.ru', 'metabot\.ru',
+'go\.mail\.ru',
+# Minor Swedish search engines
+'evreka\.passagen\.se','eniro\.se',
+# Minor Slovak search engines
+'zoznam\.sk',
+# Minor Portuguese search engines
+'sapo\.pt',
+# Minor swiss search engines
+'search\.ch', 'search\.bluewin\.ch',
+'www\.zapmeta\.ch',
+'etools\.ch',
+# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines
+'pogodak\.'
+);
+@SearchEnginesSearchIDOrder_listgen=(
+# Generic search engines
+'search\..*\.\w+'
+);
+
+
+# NotSearchEnginesKeys
+# If a search engine key is found, we check its exclude list to know if it's
+# really a search engine
+#------------------------------------------------------------------------------
+%NotSearchEnginesKeys=(
+'altavista\.'=>'babelfish\.altavista\.',
+'google\.'=>'mail\.google\.',
+'google\.'=>'translate\.google\.',
+'google\.'=>'code\.google\.',
+'google\.'=>'groups\.google\.',
+'msn\.'=>'hotmail\.msn\.',
+'tiscali\.'=>'mail\.tiscali\.',
+'yahoo\.'=>'(?:picks|mail)\.yahoo\.|yahoo\.[^/]+/picks',
+'yandex\.'=>'direct\.yandex\.'
+);
+
+
+# SearchEnginesHashID
+# Each Search Engine Search ID is associated to an AWStats id string
+#------------------------------------------------------------------------------
+%SearchEnginesHashID = (
+# Major international search engines
+'google\.[\w.]+/products','google_products',
+'base\.google\.','google_base',
+'froogle\.google\.','google_froogle',
+'groups\.google\.','google_groups',
+'images\.google\.','google_image',
+'google\.','google',
+'googlee\.','google',
+'googlecom\.com','google',
+'goggle\.co\.hu','google',
+'216\.239\.32\.20', 'google',
+'173\.194\.32\.223', 'google',
+'216\.239\.(35|37|39|51)\.100','google_cache',
+'216\.239\.(35|37|39|51)\.101','google_cache',
+'216\.239\.5[0-9]\.104','google_cache',
+'64\.233\.1[0-9]{2}\.104','google_cache',
+'66\.102\.[1-9]\.104','google_cache',
+'66\.249\.93\.104','google_cache',
+'72\.14\.2[0-9]{2}\.104','google_cache',
+'msn\.','msn',
+'live\.com','live',
+'bing\.','bing',
+'voila\.','voila',
+'mindset\.research\.yahoo','yahoo_mindset',
+'yahoo\.','yahoo','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)','yahoo',
+'lycos\.','lycos',
+'alexa\.com','alexa',
+'alltheweb\.com','alltheweb',
+'altavista\.','altavista',
+'a9\.com','a9',
+'dmoz\.org','dmoz',
+'netscape\.','netscape',
+'search\.terra\.','terra',
+'www\.search\.com','search.com',
+'tiscali\.','tiscali',
+'search\.aol\.co','aol',
+'search\.sli\.sympatico\.ca','sympatico',
+'excite\.','excite',
+# Minor international search engines
+'4\-counter\.com','google4counter',
+'att\.net','att',
+'bungeebonesdotcom','bungeebonesdotcom',
+'northernlight\.','northernlight',
+'hotbot\.','hotbot',
+'kvasir\.','kvasir',
+'webcrawler\.','webcrawler',
+'metacrawler\.','metacrawler',
+'go2net\.com','go2net',
+'(^|\.)go\.com','go',
+'euroseek\.','euroseek',
+'looksmart\.','looksmart',
+'spray\.','spray',
+'nbci\.com\/search','nbci',
+'de\.ask.\com','askde', # break out Ask country specific engines.
+'es\.ask.\com','askes',
+'fr\.ask.\com','askfr',
+'it\.ask.\com','askit',
+'nl\.ask.\com','asknl',
+'uk\.ask.\com','askuk',
+'(^|\.)ask\.co\.uk','askuk',
+'(^|\.)ask\.com','ask',
+'atomz\.','atomz',
+'overture\.com','overture',            # Replace 'goto\.com','Goto.com',
+'teoma\.','teoma',
+'findarticles\.com','findarticles',
+'infospace\.com','infospace',
+'mamma\.','mamma',
+'dejanews\.','dejanews',
+'dogpile\.com','dogpile',
+'wisenut\.com','wisenut',
+'ixquick\.com','ixquick',
+'search\.earthlink\.net','earthlink',
+'i-une\.com','iune',
+'blingo\.com','blingo',
+'centraldatabase\.org','centraldatabase',
+'clusty\.com','clusty',
+'mysearch\.','mysearch',
+'vivisimo\.com','vivisimo',
+'kartoo\.com','kartoo',
+'icerocket\.com','icerocket',
+'sphere\.com','sphere',
+'ledix\.net','ledix',
+'start\.shaw\.ca','shawca',
+'searchalot\.com','searchalot',
+'copernic\.com','copernic',
+'avantfind\.com','avantfind',
+'steadysearch\.com','steadysearch',
+'steady-search\.com','steadysearch',
+'claro-search\.com','clarosearch',
+'www1\.search-results\.com', 'searchresults',
+'www\.holasearch\.com', 'holasearch',
+'search\.conduit\.com', 'conduit',
+'static\.flipora\.com', 'flipora',
+'(?:www[12]?|mixidj)\.delta-search\.com', 'delta-search',
+'start\.iminent\.com', 'iminent',
+'www\.searchmobileonline\.com', 'searchmobileonline',
+'int\.search-results\.com', 'nortonsavesearch',
+'www2\.inbox\.com', 'inbox',
+'www\.govome\.com', 'govome',
+'find1friend\.com', 'find1friend',
+'start\.mysearchdial\.com', 'mysearchdial',
+'go\.speedbit\.com', 'speedbit',
+'search\.certified-toolbar\.com', 'certifiedtoolbarsearch',
+'search\.sweetim\.com', 'sweetim',
+'search\.searchcompletion\.com', 'searchcompletion',
+'en\.eazel\.com','eazelsearch',
+'sr\.searchfunmoods\.com', 'searchfunmoods',
+'173\.194\.35\.177', 'googleByIP',
+'dalesearch\.com', 'dalesearch',
+'sweetpacks-search\.com', 'sweetpacks',
+'searchgol\.com', 'searchgol',
+'duckduckgo\.com', 'duckduckgo',
+'sr\.facemoods\.com', 'facemoods',
+'shoppstop\.com', 'shoppstop',
+'searchya\.com', 'searchya',
+'picsearch\.de', 'picsearch',
+'webssearches\.com', 'webssearches',
+'airzip\.inspsearch\.com', 'webssearches',
+'zapmeta\.de', 'zapmeta',
+'localmoxie\.com', 'localmoxie',
+'search-results\.mobi', 'search-results_mobi',
+'androidsearch\.com', 'androidsearch',
+'isearch\.nation\.com', 'isearch_nation_com',
+'search\.zonealarm\.com', 'search_zonealarm_com',
+'www\.buenosearch\.com', 'www_buenosearch_com',
+'search\.foxtab\.com', 'search_foxtab_com',
+'searches\.qone8\.com', 'searches_qone8_com',
+'startpage\.com', 'startpage_com',
+'www\.qwant\.com', 'qwant_com',
+'searches\.safehomepage\.com', 'safehomepage_com',
+'searches\.vi-view\.com', 'vi-view_com',
+'wow\.utop\.it', 'wow_utop_it',
+'windowssearch\.com', 'windowssearch_com',
+'www\.wow\.com', 'www_wow_com',
+'globososo\.', 'globososo',
+'kingtale3\.inspsearch\.com', 'globososo',
+'swisscows\.ch', 'swisscows_ch',
+'preciobarato\.xyz', 'preciobarato_xyz',
+'www\.dregol\.com', 'www_dregol_com',
+'search\.socialdownloadr\.com', 'search_socialdownloadr_com',
+'int\.search\.myway\.com', 'int_search_myway_com',
+'de\.dolphin\.com', 'de_dolphin_com',
+'mys\.yoursearch\.me', 'mys_yoursearch_me',
+# Chello Portals
+'chello\.at','chelloat',
+'chello\.be','chellobe',
+'chello\.cz','chellocz',
+'chello\.fr','chellofr',
+'chello\.hu','chellohu',
+'chello\.nl','chellonl',
+'chello\.no','chellono',
+'chello\.pl','chellopl',
+'chello\.se','chellose',
+'chello\.sk','chellosk',
+'chello','chellocom',
+# Mirago
+'mirago\.be','miragobe',
+'mirago\.ch','miragoch',
+'mirago\.de','miragode',
+'mirago\.dk','miragodk',
+'es\.mirago\.com','miragoes',
+'mirago\.fr','miragofr',
+'mirago\.it','miragoit',
+'mirago\.nl','miragonl',
+'no\.mirago\.com','miragono',
+'mirago\.se','miragose',
+'mirago\.co\.uk','miragocouk',
+'mirago','mirago', # required as catchall for new countries not yet known
+'answerbus\.com','answerbus',
+'icq\.com\/search','icq',
+'nusearch\.com','nusearch',
+'goodsearch\.com','goodsearch',
+'scroogle\.org','scroogle',
+'questionanswering\.com','questionanswering',
+'mywebsearch\.com','mywebsearch',
+'as\.starware\.com','comettoolbar',
+# Social Bookmarking Services
+'del\.icio\.us','delicious',
+'digg\.com','digg',
+'stumbleupon\.com','stumbleupon',
+'swik\.net','swik',
+'segnalo\.alice\.it','segnalo',
+'ineffabile\.it','ineffabile',
+# Minor Australian search engines
+'anzwers\.com\.au','anzwers',
+# Minor brazilian search engines
+'engine\.exe','engine',
+'miner\.bol\.com\.br','miner',
+# Minor chinese search engines
+'\.baidu\.com','baidu',
+'iask\.com','iask',
+'\.accoona\.com','accoona',
+'\.3721\.com','3721',
+'\.163\.com','netease',
+'\.soso\.com','soso',
+'\.zhongsou\.com','zhongsou',
+'\.vnet\.cn','vnet',
+'\.sogou\.com','sogou',
+# Minor czech search engines
+'atlas\.cz','atlas',
+'seznam\.cz','seznam',
+'quick\.cz','quick',
+'centrum\.cz','centrum',
+'jyxo\.(cz|com)','jyxo',
+'najdi\.to','najdi',
+'redbox\.cz','redbox',
+'isearch\.avg\.com', 'avgsearch',
+# Minor danish search-engines
+'opasia\.dk','opasia',
+'danielsen\.com','danielsen',
+'sol\.dk','sol',
+'jubii\.dk','jubii',
+'find\.dk','finddk',
+'edderkoppen\.dk','edderkoppen',
+'netstjernen\.dk','netstjernen',
+'orbis\.dk','orbis',
+'tyfon\.dk','tyfon',
+'1klik\.dk','1klik',
+'ofir\.dk','ofir',
+# Minor dutch search engines
+'ilse\.','ilse',
+'vindex\.','vindex',
+# Minor english search engines
+'bbc\.co\.uk/cgi-bin/search','bbc',
+'ifind\.freeserve','freeserve',
+'looksmart\.co\.uk','looksmartuk',
+'splut\.','splut',
+'spotjockey\.','spotjockey',
+'ukdirectory\.','ukdirectory',
+'ukindex\.co\.uk','ukindex',
+'ukplus\.','ukplus',
+'searchy\.co\.uk','searchy',
+'search\.fbdownloader\.com','fbdownloader',
+'search\.fdownloadr\.com', 'fdownloadr_com',
+'search\.babylon\.com', 'babylon',
+'my\.allgameshome\.com', 'allgameshome',
+'surfcanyon\.com', 'surfcanyon_com',
+'uk\.foxstart\.com', 'uk_foxstart_com',
+'yandex\.com', 'yandex_com',
+# Minor finnish search engines
+'haku\.www\.fi','haku',
+# Minor french search engines
+'recherche\.aol\.fr','aolfr',
+'ctrouve\.','ctrouve',
+'francite\.','francite',
+'\.lbb\.org','lbb',
+'rechercher\.libertysurf\.fr','libertysurf',
+'search[\w\-]+\.free\.fr','free',
+'recherche\.club-internet\.fr','clubinternet',
+'toile\.com','toile',
+'biglotron\.com', 'biglotron',
+'mozbot\.fr', 'mozbot',
+# Minor german search engines
+'sucheaol\.aol\.de','aolde',
+'o2suche\.aol\.de','o2aolde',
+'fireball\.de','fireball',
+'infoseek\.de','infoseek',
+'suche\d?\.web\.de','webde',
+'[a-z]serv\.rrzn\.uni-hannover\.de','meta',
+'suchen\.abacho\.de','abacho',
+'(brisbane|suche)\.t-online\.de','t-online',
+'allesklar\.de','allesklar',
+'meinestadt\.de','meinestadt',
+'212\.227\.33\.241','metaspinner',
+'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)','metacrawler_de',
+'wwweasel\.de','wwweasel',
+'netluchs\.de','netluchs',
+'schoenerbrausen\.de','schoenerbrausen',
+'suche\.gmx\.net', 'gmxsuche',
+'suche\.gmx\.at', 'gmxsuche_at',
+'ecosia\.org', 'ecosiasearch',
+'de\.aolsearch\.com', 'aolsearch',
+'suche\.aol\.de', 'aolsuche',
+'www\.startxxl\.com', 'startxxl',
+'www\.benefind\.de', 'benefind',
+'www\.amazon\.de.*search', 'amazonsearch', #Not clear if this matches amazon searches only
+'de\.wow\.com', 'wowsearch',
+'www\.vlips\.de', 'vlips_de',
+'metager\.de', 'metager',
+'search\.1und1\.de', 'search_1und1_de',
+'sm\.de', 'smde',
+'sumaja\.de', 'sumaja',
+'navigationshilfe\.t-online\.de', 'navigationshilfe',
+'umfis\.de', 'umfis',
+'fastbot\.de', 'fastbot_de',
+'tixuma\.de', 'tixuma_de',
+'suche\.freenet\.de', 'freenet_de',
+'www\.izito\.de', 'izito_de',
+'extern\.peoplecheck\.de', 'peoplecheck_de',
+'www\.oneseek\.de', 'oneseek_de',
+'de\.wiki\.gov\.cn', 'de_wiki_gov_cn',
+'umuwa\.de', 'umuwa_de',
+'suche\.1und1\.de', '1und1_de',
+'www\.metasuche\.ch', 'metasuche_ch',
+# Minor Hungarian search engines
+'heureka\.hu','heureka',
+'vizsla\.origo\.hu','origo',
+'lapkereso\.hu','lapkereso',
+'goliat\.hu','goliat',
+'index\.hu','indexhu',
+'wahoo\.hu','wahoo',
+'webmania\.hu','webmania',
+'search\.internetto\.hu','internetto',
+'tango\.hu','tango_hu',
+'keresolap\.hu','keresolap_hu',
+'kereso\.startlap\.hu', 'startlap_hu',
+'polymeta\.hu','polymeta_hu',
+# Minor Indian search engines
+'sify\.com','sify',
+# Minor Italian search engines
+'virgilio\.it','virgilio',
+'arianna\.libero\.it','arianna',
+'supereva\.com','supereva',
+'kataweb\.it','kataweb',
+'search\.alice\.it\.master','aliceitmaster',
+'search\.alice\.it','aliceit',
+'gotuneed\.com','gotuneed',
+'godado','godado',
+'jumpy\.it','jumpy\.it',
+'shinyseek\.it','shinyseek\.it',
+'teecno\.it','teecnoit',
+# Minor Israeli search engines
+'search\.genieo\.com', 'genieo',
+# Minor Japanese search engines
+'ask\.jp','askjp',
+'sagool\.jp','sagool',
+'websearch\.rakuten\.co\.jp', 'rakuten',
+# Minor Norwegian search engines
+'sok\.start\.no','start', 'eniro\.no','eniro',
+# Minor Polish search engines
+'szukaj\.wp\.pl','wp',
+'szukaj\.onet\.pl','onetpl',
+'dodaj\.pl','dodajpl',
+'gazeta\.pl','gazetapl',
+'gery\.pl','gerypl',
+'netsprint\.pl\/hoga\-search','hogapl',
+'netsprint\.pl','netsprintpl',
+'interia\.pl','interiapl',
+'katalog\.onet\.pl','katalogonetpl',
+'o2\.pl','o2pl',
+'polska\.pl','polskapl',
+'szukacz\.pl','szukaczpl',
+'wow\.pl','wowpl',
+# Minor russian search engines
+'ya(ndex)?\.ru','yandex',
+'aport\.ru','aport',
+'rambler\.ru','rambler',
+'turtle\.ru','turtle',
+'metabot\.ru','metabot',
+'go\.mail\.ru', 'mailru',
+# Minor Swedish search engines
+'evreka\.passagen\.se','passagen',
+'eniro\.se','enirose',
+# Minor Slovak search engines
+'zoznam\.sk','zoznam',
+# Minor Portuguese search engines
+'sapo\.pt','sapo',
+# Minor swiss search engines
+'search\.ch','searchch',
+'search\.bluewin\.ch','bluewin',
+'www\.zapmeta\.ch', 'zapmeta_ch',
+'etools\.ch', 'etools_ch',
+# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines
+'pogodak\.','pogodak',
+# Generic search engines
+'search\..*\.\w+','search'
+);
+
+
+# SearchEnginesWithKeysNotInQuery
+# List of search engines that store keyword as page instead of query parameter
+#------------------------------------------------------------------------------
+%SearchEnginesWithKeysNotInQuery=(
+'a9',1,    # www.a9.com/searchkey1%20searchkey2
+'iminent',1, #http://start.iminent.com/StartWeb/1031/toolbox/#q=searchkey1%20searchkey2&additional_arguments
+'de_wiki_gov_cn',1, #http://de.wiki.gov.cn/s_searchkey1%20searchkey2
+'umuwa_de', 1, #http://umuwa.de/searchkey or http://umuwa.de/searchkey/Images
+'amazonsearch', 1 #http://www.amazon.de/gp/bit/apps/web/SERP/search/ref=bit_bds-p24_serp_cr_de?ie=UTF8tagbase=bds-p24&query=deutsch+8.+klasse+gymnasium+protokoll
+);
+
+# SearchEnginesKnownUrl
+# Known rules to extract keywords from a referrer search engine URL
+#------------------------------------------------------------------------------
+%SearchEnginesKnownUrl=(
+# Most common search engines
+'alexa','q=',
+'alltheweb','q(|uery)=',
+'altavista','q=',
+'a9','a9\.com\/',
+'dmoz','search=',
+'google_products','(p|q|as_p|as_q)=',
+'google_base','(p|q|as_p|as_q)=',
+'google_froogle','(p|q|as_p|as_q)=',
+'google_groups','group\/', # does not work
+'google_image','(p|q|as_p|as_q)=',
+'google_cache','(p|q|as_p|as_q)=cache:[0-9A-Za-z]{12}:',
+'google','(p|q|as_p|as_q)=',
+'lycos','query=',
+'msn','q=',
+'live','q=',
+'bing','q=',
+'netscape','search=',
+'tiscali','key=',
+'aol','query=',
+'terra','query=',
+'voila','(kw|rdata)=',
+'search.com','q=',
+'yahoo_mindset','p=',
+'yahoo','p=',
+'sympatico', 'query=',
+'excite','search=',
+# Minor international search engines
+'google4counter','(p|q|as_p|as_q)=',
+'att','qry=',
+'bungeebonesdotcom','query=',
+'go','qt=',
+'askde','(ask|q)=', # break out Ask country specific engines.
+'askes','(ask|q)=',
+'askfr','(ask|q)=',
+'askit','(ask|q)=',
+'asknl','(ask|q)=',
+'ask','(ask|q)=',
+'atomz','sp-q=',
+'euroseek','query=',
+'findarticles','key=',
+'go2net','general=',
+'hotbot','mt=',
+'infospace','qkw=',
+'kvasir', 'q=',
+'looksmart','key=',
+'mamma','query=',
+'metacrawler','general=',
+'nbci','keyword=',
+'northernlight','qr=',
+'overture','keywords=',
+'dogpile', 'q(|kw)=',
+'spray','string=',
+'teoma','q=',
+'webcrawler','searchText=',
+'wisenut','query=',
+'ixquick', 'query=',
+'earthlink', 'q=',
+'iune','(keywords|q)=',
+'blingo','q=',
+'centraldatabase','query=',
+'clusty','query=',
+'mysearch','searchfor=',
+'vivisimo','query=',
+# kartoo: No keywords passed in referring URL.
+'kartoo','',
+'icerocket','q=',
+'sphere','q=',
+'ledix','q=',
+'shawca','q=',
+'searchalot','q=',
+'copernic','web\/',
+'avantfind','keywords=',
+'steadysearch','w=',
+'clarosearch','q=',
+'searchresults','q=',
+'holasearch', 'q=',
+'conduit', 'q=',
+'flipora', 'q=',
+'delta-search', 'q=',
+'iminent', 'q=',
+'searchmobileonline', 'q=',
+'nortonsavesearch', 'q=',
+'inbox', 'q(?:kw)?=',
+'govome', 'q=',
+'find1friend', 'q=',
+'mysearchdial', 'q=',
+'speedbit', 'q=',
+'certifiedtoolbarsearch', 'q=',
+'sweetim', 'q=',
+'searchcompletion', 'q=',
+'eazelsearch', 'q=',
+'searchfunmoods', 'q=',
+'googleByIP', 'q=',
+'dalesearch',  'q=',
+'sweetpacks', 'q=',
+'searchgol', 'q=',
+'duckduckgo', 'uddg=',
+'facemoods', 'q=',
+'shoppstop', 'keywords=',
+'searchya', 'q=',
+'picsearch', 'q=',
+'webssearches', 'q=',
+'zapmeta', 'query=',
+'localmoxie', 'keyword=',
+'search-results_mobi', 'q=',
+'androidsearch', 'q=',
+'isearch_nation_com', 'q=',
+'search_zonealarm_com', 'q=',
+'www_buenosearch_com', 'q=',
+'search_foxtab_com', 'q=',
+'searches_qone8_com', 'q=',
+'startpage_com', 'query=',
+'qwant_com', 'q=',
+'safehomepage_com', 'q=',
+'vi-view_com', 'q=',
+'wow_utop_it', 'q=',
+'windowssearch_com', 'q=',
+'www_wow_com', 'q=',
+'globososo', 'q=',
+'swisscows_ch', 'query=',
+'preciobarato_xyz', 's=',
+'www_dregol_com', 'q=',
+'search_socialdownloadr_com', 'q=',
+'int_search_myway_com', 'searchfor=',
+'de_dolphin_com', 'q=',
+'mys_yoursearch_me', 'q=',
+# Chello Portals
+'chelloat','q1=',
+'chellobe','q1=',
+'chellocz','q1=',
+'chellofr','q1=',
+'chellohu','q1=',
+'chellonl','q1=',
+'chellono','q1=',
+'chellopl','q1=',
+'chellose','q1=',
+'chellosk','q1=',
+'chellocom','q1=',
+# Mirago
+'miragobe','(txtsearch|qry)=',
+'miragoch','(txtsearch|qry)=',
+'miragode','(txtsearch|qry)=',
+'miragodk','(txtsearch|qry)=',
+'miragoes','(txtsearch|qry)=',
+'miragofr','(txtsearch|qry)=',
+'miragoit','(txtsearch|qry)=',
+'miragonl','(txtsearch|qry)=',
+'miragono','(txtsearch|qry)=',
+'miragose','(txtsearch|qry)=',
+'miragocouk','(txtsearch|qry)=',
+'mirago','(txtsearch|qry)=',
+'answerbus','', # Does not provide query parameters
+'icq','q=',
+'nusearch','nusearch_terms=',
+'goodsearch','Keywords=',
+'scroogle','Gw=', # Does not always provide query parameters
+'questionanswering','',
+'mywebsearch','searchfor=',
+'comettoolbar','qry=',
+# Social Bookmarking Services
+'delicious','all=',
+'digg','s=',
+'stumbleupon','',
+'swik','swik\.net/', # does not work. Keywords follow domain, e.g. http://swik.net/awstats+analytics
+'segnalo','',
+'ineffabile','',
+# Minor Australian search engines
+'anzwers','search=',
+# Minor brazilian search engines
+'engine','p1=', 'miner','q=',
+# Minor chinese search engines
+'baidu','(wd|word)=',
+'iask','(w|k)=',
+'accoona','qt=',
+'3721','(p|name)=',
+'netease','q=',
+'soso','q=',
+'zhongsou','(word|w)=',
+'sogou', 'query=',
+'vnet','kw=',
+# Minor czech search engines
+'atlas','(searchtext|q)=', 'seznam','(w|q)=', 'quick','query=', 'centrum','q=', 'jyxo','(s|q)=', 'najdi','dotaz=', 'redbox','srch=',
+'avgsearch', 'q=',
+# Minor danish search engines
+'opasia','q=', 'danielsen','q=', 'sol','q=', 'jubii','soegeord=', 'finddk','words=', 'edderkoppen','query=', 'orbis','search_field=', '1klik','query=', 'ofir','querytext=',
+# Minor dutch search engines
+'ilse','search_for=', 'vindex','in=',
+# Minor english search engines
+'askuk','(ask|q)=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=',
+'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=',
+'fbdownloader','q=',
+'fdownloadr_com', 'q=',
+'babylon','q=',
+'allgameshome', 's=',
+'surfcanyon_com', 'q=',
+'uk_foxstart_com', 'q=',
+'yandex_com', 'text=',
+# Minor finnish search engines
+'haku','w=',
+# Minor french search engines
+'francite','name=', 'clubinternet', 'q=',
+'toile', 'q=',
+'biglotron','question=',
+'mozbot','q=',
+# Minor german search engines
+'aolde','q=',
+'o2aolde', 'q=',
+'fireball','q=', 'infoseek','qt=', 'webde','su=',
+'abacho','q=', 't-online','q=',
+'metaspinner','qry=',
+'metacrawler_de','qry=',
+'wwweasel','q=',
+'netluchs','query=',
+'schoenerbrausen','q=',
+'gmxsuche', 'q=',
+'gmxsuche_at', 'q=',
+'ecosiasearch', 'q=',
+'aolsearch', 'q=',
+'aolsuche', 'q=',
+'startxxl', 'q=',
+'benefind', 'q=',
+'amazonsearch', 'query=',
+'wowsearch', 'q=',
+'vlips_de', 'q=',
+'metager', 'eingabe=',
+'search_1und1_de', 'q=',
+'smde', 'q=',
+#'sumaja', 'no query string available', #There is no query string in the referrer url
+'navigationshilfe', 'q=',
+'umfis', 'suchbegriff=',
+'fastbot_de', 'red=[0-9]*\+',
+'tixuma_de', 'sc=',
+'freenet_de', 'query=',
+'izito_de', 'q=',
+'peoplecheck_de', 'q=',
+'oneseek_de', 'q=',
+'de_wiki_gov_cn', 'de\.wiki\.gov\.cn\/s_',
+'umuwa_de', 'umuwa\.de\/',
+'1und1_de', 'q=',
+'metasuche_ch', 'q=',
+# Minor Hungarian search engines
+'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=',
+'keresolap_hu','q=',
+'startlap_hu', 'q=',
+'tango_hu','q=',
+'polymeta_hu','',
+# Minor Indian search engines
+'sify','keyword=',
+# Minor Italian search engines
+'virgilio','qs=',
+'arianna','query=',
+'supereva','q=',
+'kataweb','q=',
+'aliceitmaster','qs=',
+'aliceit','qs=',
+'gotuneed','', # Not yet known
+'godado','Keywords=',
+'jumpy\.it','searchWord=',
+'shinyseek\.it','KEY=',
+'teecnoit','q=',
+# Minor Israeli search engines
+'genieo','q=',
+# Minor Japanese search engines
+'askjp','(ask|q)=',
+'sagool','q=',
+'rakuten', 'qt=',
+# Minor Norwegian search engines
+'start','q=', 'eniro','q=',
+# Minor Polish search engines
+'wp','szukaj=',
+'onetpl','qt=',
+'dodajpl','keyword=',
+'gazetapl','slowo=',
+'gerypl','q=',
+'hogapl','qt=',
+'netsprintpl','q=',
+'interiapl','q=',
+'katalogonetpl','qt=',
+'o2pl','qt=',
+'polskapl','qt=',
+'szukaczpl','q=',
+'wowpl','q=',
+# Minor russian search engines
+'yandex', 'text=', 'rambler','words=', 'aport', 'r=', 'metabot', 'st=',
+'mailru', 'q=',
+# Minor swedish search engines
+'passagen','q=',
+'enirose', 'hitta:', #Not sure if this works, as the keywords are part of the URL, and therefore the URL does not contain a question mark.
+# Minor swiss search engines
+'searchch', 'q=', 'bluewin', 'qry=',
+'zapmeta_ch', 'query=',
+'etools_ch', 'query=',
+# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines
+'pogodak', 'q='
+);
+
+# SearchEnginesKnownUrlNotFound
+# Known rules to extract not found keywords from a referrer search engine URL
+#------------------------------------------------------------------------------
+%SearchEnginesKnownUrlNotFound=(
+# Most common search engines
+'msn','origq='
+);
+
+# If no rules are known, we take first paramater not into WordsToCleanSearchUrl
+#------------------------------------------------------------------------------
+@WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look=');
+
+# SearchEnginesKnownUTFCoding
+# Known parameter that proves a search engine has coded its parameters in UTF-8
+#------------------------------------------------------------------------------
+%SearchEnginesKnownUTFCoding=(
+# Most common search engines
+'google','ie=utf-8',
+'alltheweb','cs=utf-8'
+);
+
+
+# SearchEnginesHashLib
+# List of search engines names
+# 'search_engine_id', 'search_engine_name',
+#------------------------------------------------------------------------------
+%SearchEnginesHashLib=(
+# Major international search engines
+'alexa','<a href="http://www.alexa.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Alexa</a>',
+'alltheweb','<a href="http://www.alltheweb.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">AllTheWeb</a>',
+'altavista','<a href="http://www.altavista.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">AltaVista</a>',
+'a9', '<a href="http://www.a9.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">A9</a>',
+'dmoz','<a href="http://dmoz.org/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">DMOZ</a>',
+'google_products','<a href="http://www.google.com/products" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (Products)</a>',
+'google_base','<a href="http://base.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (Base)</a>',
+'google_froogle','<a href="http://froogle.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Froogle (Google)</a>',
+'google_groups','<a href="http://groups.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (Groups)</a>',
+'google_image','<a href="http://images.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (Images)</a>',
+'google_cache','<a href="http://www.google.com/help/features.html#cached" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (cache)</a>',
+'google','<a href="http://www.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google</a>',
+'lycos','<a href="http://www.lycos.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Lycos</a>',
+'msn','<a href="http://search.msn.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Microsoft MSN Search</a>',
+'live','<a href="http://www.live.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Microsoft Windows Live</a>',
+'bing','<a href="http://www.bing.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Microsoft Bing</a>',
+'netscape','<a href="http://www.netscape.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Netscape</a>',
+'aol','<a href="http://www.aol.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">AOL</a>',
+'terra','<a href="http://www.terra.es/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Terra</a>',
+'tiscali','<a href="http://search.tiscali.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Tiscali</a>',
+'voila','<a href="http://www.voila.fr/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Voila</a>',
+'search.com','<a href="http://www.search.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Search.com</a>',
+'yahoo_mindset','<a href="http://mindset.research.yahoo.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Yahoo! Mindset</a>',
+'yahoo','<a href="http://www.yahoo.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Yahoo!</a>',
+'sympatico','<a href="http://sympatico.msn.ca/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Sympatico</a>',
+'excite','<a href="http://www.excite.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Excite</a>',
+# Minor international search engines
+'google4counter','<a href="http://www.4-counter.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">4-counter (Google)</a>',
+'att','<a href="http://www.att.net/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">AT&T search (powered by Google)</a>',
+'bungeebonesdotcom','<a href="http://BungeeBones.com/search.php/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">BungeeBones</a>',
+'go','Go.com',
+'askde','<a href="http://de.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Deutschland</a>',
+'askes','<a href="http://es.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Espa&ntilde;a</a>', # break out Ask country specific engines.
+'askfr','<a href="http://fr.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask France</a>',
+'askit','<a href="http://it.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Italia</a>',
+'asknl','<a href="http://nl.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Nederland</a>',
+'ask','<a href="http://www.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask</a>',
+'atomz','Atomz',
+'dejanews','DejaNews',
+'euroseek','Euroseek',
+'findarticles','Find Articles',
+'go2net','Go2Net (Metamoteur)',
+'hotbot','Hotbot',
+'infospace','InfoSpace',
+'kvasir','Kvasir',
+'looksmart','Looksmart',
+'mamma','Mamma',
+'metacrawler','MetaCrawler (Metamoteur)',
+'nbci','NBCI',
+'northernlight','NorthernLight',
+'overture','Overture',                 # Replace 'goto\.com','Goto.com',
+'dogpile','<a href="http://www.dogpile.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Dogpile</a>',
+'spray','Spray',
+'teoma','<a href="http://search.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Teoma</a>',                                                      # Replace 'directhit\.com','DirectHit',
+'webcrawler','<a href="http://www.webcrawler.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">WebCrawler</a>',
+'wisenut','WISENut',
+'ixquick','<a href="http://www.ixquick.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">ix quick</a>',
+'earthlink', 'Earth Link',
+'iune','<a href="http://www.i-une.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">i-une</a>',
+'blingo','<a href="http://www.blingo.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Blingo</a>',
+'centraldatabase','<a href="http://search.centraldatabase.org/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">GPU p2p search</a>',
+'clusty','<a href="http://www.clusty.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Clusty</a>',
+'mysearch','<a href="http://www.mysearch.com" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">My Search</a>',
+'vivisimo','<a href="http://www.vivisimo.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Vivisimo</a>',
+'kartoo','<a href="http://www.kartoo.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Kartoo</a>',
+'icerocket','<a href="http://www.icerocket.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Icerocket (Blog)</a>',
+'sphere','<a href="http://www.sphere.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Sphere (Blog)</a>',
+'ledix','<a href="http://www.ledix.net/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ledix</a>',
+'shawca','<a href="http://start.shaw.ca/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Shaw.ca</a>',
+'searchalot','<a href="http://www.searchalot.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Searchalot</a>',
+'copernic','<a href="http://www.copernic.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Copernic</a>',
+'avantfind','<a href="http://www.avantfind.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Avantfind</a>',
+'steadysearch','<a href="http://www.avantfind.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Avantfind</a>',
+'clarosearch','<a href="http://www.claro-search.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Claro Search</a>',
+'searchresults','<a href="http://www1.search-results.com/" rel="nofollow" title="Search-results Home Page [new window]" target="_blank">Search-results</a>',
+'holasearch', '<a href="http://www.holasearch.com" rel="nofollow" title="Hola Search Home Page [new window]" target="_blank">Hola Search</a>',
+'conduit', '<a href="http://search.conduit.com" rel="nofollow" title="Conduit Search Home Page [new window]" target="_blank">Conduit Search</a>',
+'flipora', '<a href="http://static.flipora.com/" rel="nofollow" title="Flipora Home Page [new window]" target="_blank">Flipora</a>',
+'delta-search', '<a href="http://www.delta-search.com/" rel="nofollow" title="Delta Search Home Page [new window]" target="_blank">Delta Search</a>',
+'iminent', '<a href="http://start.iminent.com/" rel="nofollow" title="Iminent Home Page [new window]" target="_blank">Iminent</a>',
+'searchmobileonline', '<a href="http://www.searchmobileonline.com/" rel="nofollow" title="Search Mobile Online (StartApp) Home Page [new window]" target="_blank">Search Mobile Online (StartApp)</a>',
+'nortonsavesearch', '<a href="http://int.search-results.com/" rel="nofollow" title="Norton Safe Search Home Page [new window]" target="_blank">Norton Safe Search</a>',
+'inbox', '<a href="http://www2.inbox.com/search/results1.aspx" rel="nofollow" title="Inbox Search [new window]" target="_blank">Inbox Search</a>',
+'govome', '<a href="http://www.govome.com/" rel="nofollow" title="Govome [new window]" target="_blank">Govome</a>',
+'find1friend', '<a href="http://find1friend.com/" rel="nofollow" title="Find1Friend [new window]" target="_blank">Find1Friend</a>',
+'mysearchdial', '<a href="http://start.mysearchdial.com/" rel="nofollow" title="My Search Dial [new window]" target="_blank">My Search Dial</a>',
+'speedbit', '<a href="http://go.speedbit.com/" rel="nofollow" title="Speedbit [new window]" target="_blank">Speedbit</a>',
+'certifiedtoolbarsearch', '<a href="http://search.certified-toolbar.com/" rel="nofollow" title="Certified-Toolbar Search [new window]" target="_blank">Certified-Toolbar Search</a>',
+'sweetim', '<a href="http://search.sweetim.com/" rel="nofollow" title="SweetIM Search [new window]" target="_blank">SweetIM Search</a>',
+'searchcompletion', '<a href="http://search.searchcompletion.com/" rel="nofollow" title="SearchCompletion Search [new window]" target="_blank">SearchCompletion Search</a>',
+'eazelsearch', '<a href="http://en.eazel.com/" rel="nofollow" title="Eazel Search [new window]" target="_blank">Eazel Search</a>',
+'searchfunmoods', '<a href="http://sr.searchfunmoods.com/" rel="nofollow" title="Funmoods [new window]" target="_blank">Funmoods</a>',
+'googleByIP', '<a href="http://173.194.35.177/" rel="nofollow" title="Google (Access by IP-Address) [new window]" target="_blank">Google (Access by IP-Address)</a>',
+'dalesearch',  '<a href="http://www.dalesearch.com/" rel="nofollow" title="Dale Search [new window]" target="_blank">Dale Search</a>',
+'sweetpacks', '<a href="http://www.sweetpacks-search.com/" rel="nofollow" title="Sweetpacks [new window]" target="_blank">Sweetpacks</a>',
+'searchgol', '<a href="http://www.searchgol.com/" rel="nofollow" title="Search-Gol [new window]" target="_blank">Search-Gol</a>',
+'duckduckgo',  '<a href="http://r.duckduckgo.com/" rel="nofollow" title="DuckDuckGo Home Page [new window]" target="_blank">DuckDuckGo (Does not provide search keyphrases, using found page instead)</a>',
+'facemoods', '<a href="http://sr.facemoods.com/" rel="nofollow" title="Facemoods Search [new window]" target="_blank">Facemoods Search</a>',
+'shoppstop', '<a href="http://www.shoppstop.com/" rel="nofollow" title="ShoppStop [new window]" target="_blank">ShoppStop</a>',
+'searchya', '<a href="http://www.searchya.com/" rel="nofollow" title="Searchya [new window]" target="_blank">Searchya</a>',
+'picsearch', '<a href="http://www.picsearch.de/" rel="nofollow" title="picsearch [new window]" target="_blank">picsearch</a>',
+'webssearches', 'Various variants of Webssearches <a href="http://www.webssearches.com/" rel="nofollow" title="Webssearches [new window]" target="_blank">EMG Technologies</a> and <a href="http://airzip.inspsearch.com/" rel="nofollow" title="airzip.inspsearch.com [new window]" target="_blank">airzip.inspsearch.com</a>',
+#Jan 8, 2016: No genuine inspsearch.com search engine seems so exist, but there is a couple of search engines using subdomains of inspsearch.com. Unclear how these are related to each other.
+'zapmeta', '<a href="http://www.zapmeta.de/" rel="nofollow" title="ZapMeta [new window]" target="_blank">ZapMeta</a>',
+'localmoxie', '<a href="http://www.localmoxie.com/" rel="nofollow" title="Local Moxie [new window]" target="_blank">Local Moxie</a>',
+'search-results_mobi', '<a href="http://search-results.mobi/" rel="nofollow" title="search-results.mobi [new window]" target="_blank">search-results.mobi</a>',
+'androidsearch', '<a href="http://www.androidsearch.com/" rel="nofollow" title="androidsearch.com [new window]" target="_blank">androidsearch.com</a>',
+'isearch_nation_com', '<a href="http://isearch.nation.com/" rel="nofollow" title="Nation Search [new window]" target="_blank">Nation Search</a>',
+'search_zonealarm_com', '<a href="http://search.zonealarm.com/" rel="nofollow" title="Zone Alarm Search [new window]" target="_blank">Zone Alarm Search</a>',
+'www_buenosearch_com', '<a href="http://www.buenosearch.com/" rel="nofollow" title="BuenoSearch [new window]" target="_blank">BuenoSearch</a>',
+'search_foxtab_com',  '<a href="http://search.foxtab.com/" rel="nofollow" title="Foxtab Search [new window]" target="_blank">Foxtab Search</a>',
+'searches_qone8_com', '<a href="http://searches.qone8.com/" rel="nofollow" title="Omiga-Plus [new window]" target="_blank">Omiga-Plus</a>',
+'startpage_com', '<a href="http://startpage.com/" rel="nofollow" title="Startpage [new window]" target="_blank">Startpage</a>',
+'qwant_com', '<a href="https://www.qwant.com/" rel="nofollow" title="qwant.com [new window]" target="_blank">qwant.com</a>',
+'safehomepage_com', '<a href="http://searches.safehomepage.com/" rel="nofollow" title="safehomepage.com [new window]" target="_blank">safehomepage.com</a>',
+'vi-view_com', '<a href="http://searches.vi-view.com/" rel="nofollow" title="vi-view.com [new window]" target="_blank">vi-view.com</a>',
+'wow_utop_it', '<a href="http://wow.utop.it/" rel="nofollow" title="wow.utop.it [new window]" target="_blank">wow.utop.it</a>',
+'windowssearch_com', '<a href="http://www.windowssearch.com/search?q=AWStats" rel="nofollow" title="Example page [new window]" target="_blank">windowssearch.com</a>',
+'www_wow_com', '<a href="http://www.wow.com/" rel="nofollow" title="www.wow.com [new window]" target="_blank">WOW.com</a>',
+'globososo', 'Various variants of Globososo (Kingtale Technology): <a href="http://www.globososo.com/" rel="nofollow" title="Globososo search page [new window]" target="_blank">www</a>, <a href="http://searches.globososo.com/" rel="nofollow" title="searches subdomain of globososo.com [new window]" target="_blank">searches</a>, <a href="http://searches3.globososo.com/" rel="nofollow" title="searches3 subdomain of globososo.com [new window]" target="_blank">searches3</a>, and at inspsearch.com (<a href="http://globososo.inspsearch.com/" rel="nofollow" title="Globososo search page at globososo.inspsearch.com [new window]" target="_blank">globososo</a>, <a href="http://kingtale3.inspsearch.com/" rel="nofollow" title="Globososo search page at kingtale3.inspsearch.com [new window]" target="_blank">kingtale3</a>)',
+'swisscows_ch', '<a href="https://swisscows.ch/" rel="nofollow" title="Swisscows search page [new window]" target="_blank">Swisscows</a>',
+'preciobarato_xyz', '<a href="http://preciobarato.xyz/" rel="nofollow" title="preciobarato.xyz offline on Oct 2, 2015 [new window]" target="_blank">Yandex</a>',
+'www_dregol_com', '<a href="http://www.dregol.com/" rel="nofollow" title="Dregol Search [new window]" target="_blank">Dregol Search</a>',
+'search_socialdownloadr_com', '<a href="http://search.socialdownloadr.com/" rel="nofollow" title="Socialdownloadr search page [new window]" target="_blank">Socialdownloadr</a>',
+'int_search_myway_com', '<a href="http://int.search.myway.com/" rel="nofollow" title="MyWay search page [new window]" target="_blank">MyWay</a>',
+'de_dolphin_com', '<a href="http://de.dolphin.com/" rel="nofollow" title="Dolphin Search search page [new window]" target="_blank">Dolphin Search</a>',
+'mys_yoursearch_me', '<a href="http://mys.yoursearch.me/" rel="nofollow" title="Yoursearch.me search page [new window]" target="_blank">Yoursearch.me</a>',
+# Chello Portals
+'chelloat','<a href="http://www.chello.at/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Austria</a>',
+'chellobe','<a href="http://www.chello.be/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Belgium</a>',
+'chellocz','<a href="http://www.chello.cz/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Czech Republic</a>',
+'chellofr','<a href="http://www.chello.fr/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello France</a>',
+'chellohu','<a href="http://www.chello.hu/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Hungary</a>',
+'chellonl','<a href="http://www.chello.nl/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Netherlands</a>',
+'chellono','<a href="http://www.chello.no/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Norway</a>',
+'chellopl','<a href="http://www.chello.pl/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Poland</a>',
+'chellose','<a href="http://www.chello.se/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Sweden</a>',
+'chellosk','<a href="http://www.chello.sk/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Slovakia</a>',
+'chellocom','<a href="http://www.chello.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello (Country not recognized)</a>',
+# Mirago
+'miragobe','<a href="http://www.mirago.be/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Belgium</a>',
+'miragoch','<a href="http://www.mirago.ch/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Switzerland</a>',
+'miragode','<a href="http://www.mirago.de/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Germany</a>',
+'miragodk','<a href="http://www.mirago.dk/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Denmark</a>',
+'miragoes','<a href="http://es.mirago.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Spain</a>',
+'miragofr','<a href="http://www.mirago.fr/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago France</a>',
+'miragoit','<a href="http://www.mirago.it/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Italy</a>',
+'miragonl','<a href="http://www.mirago.nl/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Netherlands</a>',
+'miragono','<a href="http://no.mirago.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Norway</a>',
+'miragose','<a href="http://www.mirago.se/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago Sweden</a>',
+'miragocouk','<a href="http://zone.mirago.co.uk/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago UK</a>',
+'mirago','<a href="http://www.mirago.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mirago (country unknown)</a>',
+'answerbus','<a href="http://www.answerbus.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Answerbus</a>',
+'icq','<a href="http://www.icq.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">icq</a>',
+'nusearch','<a href="http://www.nusearch.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Nusearch</a>',
+'goodsearch','<a href="http://www.goodsearch.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">GoodSearch</a>',
+'scroogle','<a href="http://www.scroogle.org/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Scroogle</a>',
+'questionanswering','<a href="http://www.questionanswering.com/" rel="nofollow" title="Questionanswering home page [new window]" target="_blank">Questionanswering</a>',
+'mywebsearch','<a href="http://search.mywebsearch.com/" rel="nofollow" title="MyWebSearch home page [new window]" target="_blank">MyWebSearch</a>',
+'comettoolbar','<a href="http://as.starware.com/dp/search" rel="nofollow" title="Comet toolbar search home page [new window]" target="_blank">Comet toolbar search</a>',
+# Social Bookmarking Services
+'delicious','<a href="http://del.icio.us/" rel="nofollow" title="del.icio.us home page [new window]" target="_blank">del.icio.us</a> (Social Bookmark)',
+'digg','<a href="http://www.digg.com/" rel="nofollow" title="Digg home page [new window]" target="_blank">Digg</a> (Social Bookmark)',
+'stumbleupon','<a href="http://www.stumbleupon.com/" rel="nofollow" title="Stumbleupon home page [new window]" target="_blank">Stumbleupon</a> (Social Bookmark)',
+'swik','<a href="http://swik.net/" rel="nofollow" title="Swik home page [new window]" target="_blank">Swik</a> (Social Bookmark)',
+'segnalo','<a href="http://segnalo.alice.it/" rel="nofollow" title="Segnalo home page [new window]" target="_blank">Segnalo</a> (Social Bookmark)',
+'ineffabile','<a href="http://www.ineffabile.it/" rel="nofollow" title="Ineffabile.it home page [new window]" target="_blank">Ineffabile.it</a> (Social Bookmark)',
+# Minor Australian search engines
+'anzwers','<a href="http://anzwers.com.au/" rel="nofollow" title="anzwers.com.au home page [new window]" target="_blank">anzwers.com.au</a>',
+# Minor brazilian search engines
+'engine','Cade', 'miner','Meta Miner',
+# Minor chinese search engines
+'baidu','<a href="http://www.baidu.com/" rel="nofollow" target="_blank">Baidu</a>',
+'iask','<a href="http://www.iask.com/" rel="nofollow" target="_blank">Iask</a>',
+'accoona','<a href="http://cn.accoona.com" rel="nofollow" target="_blank">Accoona</a>',
+'3721','<a href="http://www.3721.com/" rel="nofollow" target="_blank">3721</a>',
+'netease', '<a href="http://www.163.com/" rel="nofollow" target="_blank">NetEase</a>',
+'soso','<a href="http://www.soso.com/" rel="nofollow" target="_blank">SoSo</a>',
+'zhongsou','<a href="http://www.zhongsou.com/" rel="nofollow" target="_blank">ZhongSou</a>',
+'sogou', '<a href="http://www.sogou.com/" rel="nofollow" target="_blank">SoGou</a>',
+'vnet','<a href="http://114.vnet.cn/" rel="nofollow" target="_blank">VNet</a>',
+# Minor czech search engines
+'atlas','Atlas.cz',    'seznam','Seznam', 'quick','Quick.cz', 'centrum','Centrum.cz', 'jyxo','Jyxo.cz', 'najdi','Najdi.to', 'redbox','RedBox.cz',
+'avgsearch', '<a href="http://isearch.avg.com" rel="nofollow" target="_blank">AVG Secure Search</a>',
+# Minor danish search-engines
+'opasia','Opasia', 'danielsen','Thor (danielsen.com)', 'sol','SOL', 'jubii','Jubii', 'finddk','Find', 'edderkoppen','Edderkoppen', 'netstjernen','Netstjernen', 'orbis','Orbis', 'tyfon','Tyfon', '1klik','1Klik', 'ofir','Ofir',
+# Minor dutch search engines
+'ilse','Ilse','vindex','Vindex\.nl',
+# Minor english search engines
+'askuk','<a href="http://uk.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask UK</a>',
+'bbc','BBC', 'freeserve','Freeserve', 'looksmartuk','Looksmart UK',
+'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk',
+'fbdownloader','<a href="http://search.fbdownloader.com/" rel="nofollow" title="FBDownloader Home Page [new window]" target="_blank">FBDownloader (fbdownloader)</a>',
+'fdownloadr_com', '<a href="http://search.fdownloadr.com/" rel="nofollow" title="FBDownloader Home Page [new window]" target="_blank">FBDownloader (fdownloadr)</a>',
+'babylon','<a href="http://search.babylon.com/" rel="nofollow" title="Babylon Home Page [new window]" target="_blank">Babylon</a>',
+'allgameshome', '<a href="http://my.allgameshome.com/" rel="nofollow" title="AllGamesHome [new window]" target="_blank">AllGamesHome</a>',
+'surfcanyon_com', '<a href="http://search.surfcanyon.com/" rel="nofollow" title="SurfCanyon [new window]" target="_blank">SurfCanyon</a>',
+'uk_foxstart_com', '<a href="http://uk.foxstart.com/" rel="nofollow" title="Foxstart search page [new window]" target="_blank">Foxstart.com</a>',
+'yandex_com', '<a href="http://yandex.com/" rel="nofollow" title="Yandex search page [new window]" target="_blank">Yandex</a>',
+# Minor finnish search engines
+'haku','Ihmemaa',
+# Minor french search engines
+'aolfr','AOL (fr)', 'ctrouve','C\'est trouve', 'francite','Francite', 'lbb', 'LBB', 'libertysurf', 'Libertysurf', 'free', 'Free.fr', 'clubinternet', 'Club-internet',
+'toile', 'Toile du Quebec',
+'biglotron','<a href="http://www.biglotron.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Biglotron</a>',
+'mozbot','<a href="http://www.mozbot.fr/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Mozbot</a>',
+# Minor German search engines
+'aolde','AOL (de)',
+'o2aolde', '<a href="http://o2suche.aol.de/aol/search" rel="nofollow" title="o2 Suche Home Page [new window]" target="_blank">o2 Suche</a>',
+'fireball','Fireball', 'infoseek','Infoseek',
+'webde','<a href="http://suche.web.de" rel="nofollow" title="Web.de Home Page [new window]" target="_blank">Web.de</a>',
+'abacho','Abacho',
+'t-online','T-Online',
+'allesklar','allesklar.de', 'meinestadt','meinestadt.de',
+'metaspinner','metaspinner',
+'metacrawler_de','metacrawler.de',
+'wwweasel','<a href="http://wwweasel.de/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">WWWeasel</a>',
+'netluchs','<a href="http://www.netluchs.de/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Netluchs</a>',
+'schoenerbrausen','<a href="http://www.schoenerbrausen.de/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Schoenerbrausen/</a>',
+'gmxsuche', '<a href="http://suche.gmx.net/" rel="nofollow" title="GMX Suche Home Page [new window]" target="_blank">GMX Suche</a>',
+'gmxsuche_at', '<a href="http://suche.gmx.at/" rel="nofollow" title="GMX Suche Oesterreich Home Page [new window]" target="_blank">GMX Suche Oesterreich</a>',
+'ecosiasearch', '<a href="http://ecosia.org" rel="nofollow" title="Ecosia Search Home Page [new window]" target="_blank">Ecosia Search</a>',
+'aolsearch', '<a href="http://de.aolsearch.com/" rel="nofollow" title="AOL Search Home Page [new window]" target="_blank">AOL Search</a>',
+'aolsuche', '<a href="http://suche.aol.de/" rel="nofollow" title="AOL Suche Home Page [new window]" target="_blank">AOL Suche</a>',
+'startxxl', '<a href="http://www.startxxl.com/" rel="nofollow" title="StartXXL Home Page [new window]" target="_blank">StartXXL</a>',
+'benefind', '<a href="http://www.benefind.de/" rel="nofollow" title="benefind Home Page [new window]" target="_blank">benefind</a>',
+'amazonsearch', '<a href="http://www.amazon.de/gp/bit/apps/web/SERP/search/" rel="nofollow" title="Amazon Web Search Home Page [new window]" target="_blank">Amazon Web Search</a>',
+'wowsearch', '<a href="http://de.wow.com/" rel="nofollow" title="Wow Search Home Page [new window]" target="_blank">Wow Search</a>',
+'vlips_de', '<a href="http://www.vlips.de/semags_proxy.php" rel="nofollow" title="vlips Proxy Page (Homepage not yet ready?) [new window]" target="_blank">vlips.de</a>',
+'metager', '<a href="http://www.metager.de" rel="nofollow" title="MetaGer Home Page [new window]" target="_blank">MetaGer</a>',
+'search_1und1_de', '<a href="http://search.1und1.de/" rel="nofollow" title="1&amp;1 Suche [new window]" target="_blank">1&amp;1 Suche (subdomain "search")</a>',
+'smde', '<a href="http://www.sm.de/" rel="nofollow" title="SM.de - Die SuchMaschine [new window]" target="_blank">SM.de - Die SuchMaschine</a>',
+'sumaja',  '<a href="http://www.sumaja.de/" rel="nofollow" title="Sumaja [new window]" target="_blank">Sumaja</a>',
+'navigationshilfe', '<a href="http://navigationshilfe.t-online.de/" rel="nofollow" title="T-Online Navigationshilfe [new window]" target="_blank">T-Online Navigationshilfe</a>',
+'umfis', '<a href="http://www.umfis.de/" rel="nofollow" title="UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland [new window]" target="_blank">UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland</a>',
+'fastbot_de', '<a href="http://fastbot.de/" rel="nofollow" title="Fastbot.de [new window]" target="_blank">Fastbot.de (Does not provide search keyphrases, using found page instead)</a>',
+'tixuma_de', '<a href="http://www.tixuma.de/" rel="nofollow" title="Tixuma Deutschland [new window]" target="_blank">Tixuma Deutschland</a>',
+'freenet_de', '<a href="http://suche.freenet.de/" rel="nofollow" title="suche.freenet.de [new window]" target="_blank">suche.freenet.de</a>',
+'izito_de',  '<a href="http://www.izito.de/" rel="nofollow" title="iZito Deutschland [new window]" target="_blank">iZito Deutschland</a>',
+'peoplecheck_de', '<a href="http://peoplecheck.de/" rel="nofollow" title="PeopleCheck.de [new window]" target="_blank">PeopleCheck.de</a>',
+'oneseek_de', '<a href="http://www.oneseek.de" rel="nofollow" title="Metasuchmaschine OneSeek.de [new window]" target="_blank">Metasuchmaschine OneSeek.de</a>',
+'de_wiki_gov_cn', '<a href="http://de.wiki.gov.cn/" rel="nofollow" title="Wiki Sucher [new window]" target="_blank">Wiki Sucher</a>',
+'umuwa_de', '<a href="http://umuwa.de/" rel="nofollow" title="Umuwa Deutschland [new window]" target="_blank">Umuwa Deutschland</a>',
+'1und1_de', '<a href="http://suche.1und1.de/" rel="nofollow" title="1&amp;1 Suche [new window]" target="_blank">1&amp;1 Suche (subdomain "suche")</a>',
+'metasuche_ch', '<a href="http://www.metasuche.ch/" rel="nofollow" title="Metasuche search page [new window]" target="_blank">Metasuche.ch</a>',
+# Minor hungarian search engines
+'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkereso', 'goliat','Goliat', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Kereso',
+'tango_hu','<a href="http://tango.hu/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Tango</a>',
+'keresolap_hu','<a href="http://keresolap.hu/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Tango keresolap</a>',
+'startlap_hu','<a href="http://kereso.startlap.hu/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Startlab Kereso</a>',
+'polymeta_hu','<a href="http://www.polymeta.hu/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Polymeta</a>',
+# Minor Indian search engines
+'sify','<a href="http://search.sify.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Sify</a>',
+# Minor Italian search engines
+'virgilio','<a href="http://www.virgilio.it/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Virgilio</a>',
+'arianna','<a href="http://arianna.libero.it/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Arianna</a>',
+'supereva','<a href="http://search.supereva.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Supereva</a>',
+'kataweb','<a href="http://www.kataweb.it/ricerca/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Kataweb</a>',
+'aliceitmaster','<a href="http://www.alice.it/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">search.alice.it.master</a>',
+'aliceit','<a href="http://www.alice.it/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">alice.it</a>',
+'gotuneed','<a href="http://www.gotuneed.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">got u need</a>',
+'godado','Godado.it',
+'jumpy\.it','Jumpy.it',
+'shinyseek\.it','Shinyseek.it',
+'teecnoit','<a href="http://www.teecno.it/" rel="nofollow" title="Teecno home page [new window]" target="_blank">Teecno</a>',
+# Minor Israeli search engines
+'genieo','<a href="http://search.genieo.com/" rel="nofollow" title="Genieo home page [new window]" target="_blank">Genieo</a>',
+# Minor Japanese search engines
+'askjp','<a href="http://www.ask.jp/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Japan</a>',
+'sagool','<a href="http://sagool.jp/" rel="nofollow" title="Sagool home page [new window]" target="_blank">Sagool</a>',
+'rakuten', '<a href="http://websearch.rakuten.co.jp" rel="nofollow" title="websearch.rakuten.co.jp [new window]" target="_blank">websearch.rakuten.co.jp</a>',
+# Minor Norwegian search engines
+'start','start.no', 'eniro','<a href="http://www.eniro.no/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Eniro</a>',
+# Minor polish search engines
+'wp','<a href="http://szukaj.wp.pl/" rel="nofollow" title="Wirtualna Polska home page [new window]" target="_blank">Wirtualna Polska</a>',
+'onetpl','<a href="http://szukaj.onet.pl/" rel="nofollow" title="Onet.pl home page [new window]" target="_blank">Onet.pl</a>',
+'dodajpl','<a href="http://www.dodaj.pl/" rel="nofollow" title="Dodaj.pl home page [new window]" target="_blank">Dodaj.pl</a>',
+'gazetapl','<a href="http://szukaj.gazeta.pl/" rel="nofollow" title="Gazeta.pl home page [new window]" target="_blank">Gazeta.pl</a>',
+'gerypl','<a href="http://szukaj.gery.pl/" rel="nofollow" title="Gery.pl home page [new window]" target="_blank">Gery.pl</a>',
+'hogapl','<a href="http://www.hoga.pl/" rel="nofollow" title="Hoga.pl home page [new window]" target="_blank">Hoga.pl</a>',
+'netsprintpl','<a href="http://www.netsprint.pl/" rel="nofollow" title="NetSprint.pl home page [new window]" target="_blank">NetSprint.pl</a>',
+'interiapl','<a href="http://www.google.interia.pl/" rel="nofollow" title="Interia.pl home page [new window]" target="_blank">Interia.pl</a>',
+'katalogonetpl','<a href="http://katalog.onet.pl/" rel="nofollow" title="Katalog.Onet.pl home page [new window]" target="_blank">Katalog.Onet.pl</a>',
+'o2pl','<a href="http://szukaj2.o2.pl/" rel="nofollow" title="o2.pl home page [new window]" target="_blank">o2.pl</a>',
+'polskapl','<a href="http://szukaj.polska.pl/" rel="nofollow" title="Polska home page [new window]" target="_blank">Polska</a>',
+'szukaczpl','<a href="http://www.szukacz.pl/" rel="nofollow" title="Szukacz home page [new window]" target="_blank">Szukacz</a>',
+'wowpl','<a href="http://szukaj.wow.pl/" rel="nofollow" title="Wow.pl home page [new window]" target="_blank">Wow.pl</a>',
+# Minor russian search engines
+'yandex', 'Yandex', 'aport', 'Aport', 'rambler', 'Rambler', 'turtle', 'Turtle', 'metabot', 'MetaBot',
+'mailru','<a href="http://go.mail.ru/" rel="nofollow" title="Mail.Ru home page [new window]" target="_blank">Mail.Ru</a>',
+# Minor Swedish search engines
+'passagen','Evreka',
+'enirose','<a href="http://www.eniro.se/" rel="nofollow" title="Eniro Sverige home page [new window]" target="_blank">Eniro Sverige</a>',
+# Minor Slovak search engines
+'zoznam','<a href="http://www.zoznam.sk/" rel="nofollow" title="Zoznam search engine home page [new window]" target="_blank">Zoznam</a>',
+# Minor Portuguese search engines
+'sapo','<a href="http://www.sapo.pt/" rel="nofollow" title="Sapo search engine home page [new window]" target="_blank">Sapo</a>',
+# Minor Swiss search engines
+'searchch', 'search.ch', 'bluewin', 'search.bluewin.ch',
+'zapmeta_ch', '<a href="http://www.zapmeta.ch/" rel="nofollow" title="ZapMeta.ch [new window]" target="_blank">ZapMeta.ch</a>',
+'etools_ch', '<a href="http://www.etools.ch/" rel="nofollow" title="eTools.ch [new window]" target="_blank">eTools.ch</a>',
+# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines
+'pogodak','<a href="http://www.pogodak.com" rel="nofollow" title="Pogodak search engine home page [new window]" target="_blank">Pogodak.com</a>',
+# Generic search engines
+'search','Unknown search engines'
+);
+
+
+# Sanity check.
+# Enable this code and run perl search_engines.pm to check file entries are ok
+#-----------------------------------------------------------------------------
+#foreach my $key (@SearchEnginesSearchIDOrder_list1) {
+#      if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID");
+#      foreach my $key2 (@SearchEnginesSearchIDOrder_list2) { if ($key2 eq $key) { error("$key is in 1 and 2\n"); } }
+#      foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 1 and gen\n"); } }
+#} }
+#foreach my $key (@SearchEnginesSearchIDOrder_list2) {
+#      if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID");
+#      foreach my $key2 (@SearchEnginesSearchIDOrder_list1) { if ($key2 eq $key) { error("$key is in 2 and 1\n"); } }
+#      foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 2 and gen\n"); } }
+#} }
+#foreach my $key (@SearchEnginesSearchIDOrder_listgen) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_listgen with no value in SearchEnginesHashID"); } }
+#foreach my $key (keys %NotSearchEnginesKeys) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in NotSearchEnginesKeys with no value in SearchEnginesHashID"); } }
+#foreach my $key (keys %SearchEnginesKnownUrl) {
+#      my $found=0;
+#      foreach my $key2 (values %SearchEnginesHashID) {
+#              if ($key eq $key2) { $found=1; last; }
+#      }
+#      if (! $found) { die "Entry '$key' has been found in SearchEnginesKnownUrl with no value in SearchEnginesHashID"; }
+#}
+#foreach my $key (keys %SearchEnginesHashLib) {
+#      my $found=0;
+#      foreach my $key2 (values %SearchEnginesHashID) {
+#              if ($key eq $key2) { $found=1; last; }
+#      }
+#      if (! $found) { die "Entry '$key' has been found in SearchEnginesHashLib with no value in SearchEnginesHashID"; }
+#}
+#print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen;
+
+1;
author	Laurent Destailleur <eldy@destailleur.fr>
	Fri, 29 Apr 2016 09:49:06 +0000 (11:49 +0200)
committer	Laurent Destailleur <eldy@destailleur.fr>
	Fri, 29 Apr 2016 09:49:06 +0000 (11:49 +0200)
make/fixdosfiles.sh	[new file with mode: 0755]	patch \| blob
make/fixutf8bomfiles.sh	[new file with mode: 0755]	patch \| blob
wwwroot/cgi-bin/awdownloadcsv.pl		patch \| blob \| blame \| history
wwwroot/cgi-bin/lang/awstats-cn.txt		patch \| blob \| blame \| history
wwwroot/cgi-bin/lang/awstats-lv.txt		patch \| blob \| blame \| history
wwwroot/cgi-bin/lib/robots.pm		patch \| blob \| blame \| history
wwwroot/cgi-bin/lib/search_engines.pm		patch \| blob \| blame \| history