if ($foundparam) { $_misc_h{"TotalMisc"}++; }
}
- # Analyze: favicon (countedtraffic=>1 if favicon)
- #------------------------------------------------
+ # Analyze: favicon (=> countedtraffic=1 if favicon)
+ #--------------------------------------------------
if ($pos_referer >= 0 && $field[$pos_referer] && $urlwithnoquery =~ /$regfavico/o) {
if (($field[$pos_code] != 404 || $urlwithnoquery !~ /\/.+\/favicon\.ico$/i) && ($field[$pos_agent] =~ /MSIE/)) {
# We don't count one hit if (not on root and error) and MSIE
$countedtraffic=1; # favicon is a case that must not be counted anywhere else
}
- # Analyze: Worms (countedtraffic=>2 if worm)
- #-------------------------------------------
+ # Analyze: Worms (=> countedtraffic=2 if worm)
+ #---------------------------------------------
if (! $countedtraffic) {
if ($LevelForWormsDetection) {
foreach (@WormsSearchIDOrder) {
}
}
- # Analyze: Status code (countedtraffic=>3 if error)
+ # Analyze: Status code (=> countedtraffic=3 if error)
#--------------------------------------------------
if (! $countedtraffic) {
if ($LogType eq 'W' || $LogType eq 'S') { # HTTP record or Stream record
}
}
- # Analyze: Robot from robot database (countedtraffic=>4 if robot)
+ # Analyze: Robot from robot database (=> countedtraffic=4 if robot)
#----------------------------------------------------------------
if (! $countedtraffic) {
if ($pos_agent >= 0) {
}
}
- # Analyze: Robot from "hit on robots.txt" file (countedtraffic=>5 if robot)
+ # Analyze: Robot from "hit on robots.txt" file (=> countedtraffic=5 if robot)
# -------------------------------------------------------------------------
if (! $countedtraffic) {
if ($urlwithnoquery =~ /$regrobot/o) {
#if ($Debug) { debug(" Analyze referer refererprot=$refererprot refererserver=$refererserver",5); }
# Kind of origin
- if (!$TmpRefererServer{$refererserver}) { # is "=" if same site, "search egine key" if search engine, not defined otherwise
+ if (!$TmpRefererServer{$refererserver}) { # TmpRefererServer{$refererserver} is "=" if same site, "search egine key" if search engine, not defined otherwise
if ($refererserver =~ /$reglocal/o) {
# Intern (This hit came from another page of the site)
if ($Debug) { debug(" Server '$refererserver' is added to TmpRefererServer with value '='",2); }
}
# News Link ?
- if (! $found && $refererprot =~ /^news/i) {
- $found=1;
- if ($PageBool) { $_from_p[5]++; }
- $_from_h[5]++;
- }
+ #if (! $found && $refererprot =~ /^news/i) {
+ # $found=1;
+ # if ($PageBool) { $_from_p[5]++; }
+ # $_from_h[5]++;
+ #}
}
}
if ($ShowOriginStats =~ /P/i) { print "<th bgcolor=\"#$color_p\" width=\"80\">$Message[56]</th><th bgcolor=\"#$color_p\" width=\"80\">$Message[15]</th>"; }
if ($ShowOriginStats =~ /H/i) { print "<th bgcolor=\"#$color_h\" width=\"80\">$Message[57]</th><th bgcolor=\"#$color_h\" width=\"80\">$Message[15]</th>"; }
print "</tr>\n";
- #------- Referrals by direct address/bookmarks
+ #------- Referrals by direct address/bookmark/link in email/etc...
print "<tr><td class=\"aws\"><b>$Message[38]</b></td>";
if ($ShowOriginStats =~ /P/i) { print "<td>".($_from_p[0]?$_from_p[0]:" ")."</td><td>".($_from_p[0]?"$p_p[0] %":" ")."</td>"; }
if ($ShowOriginStats =~ /H/i) { print "<td>".($_from_h[0]?$_from_h[0]:" ")."</td><td>".($_from_h[0]?"$p_h[0] %":" ")."</td>"; }
print "</tr>\n";
- #------- Referrals by news group
- print "<tr><td class=\"aws\"><b>$Message[107]</b></td>";
- if ($ShowOriginStats =~ /P/i) { print "<td>".($_from_p[5]?$_from_p[5]:" ")."</td><td>".($_from_p[5]?"$p_p[5] %":" ")."</td>"; }
- if ($ShowOriginStats =~ /H/i) { print "<td>".($_from_h[5]?$_from_h[5]:" ")."</td><td>".($_from_h[5]?"$p_h[5] %":" ")."</td>"; }
- print "</tr>\n";
#------- Referrals by search engines
print "<tr".Tooltip(13)."><td class=\"aws\"><b>$Message[40]</b> - <a href=\"".($ENV{'GATEWAY_INTERFACE'} || !$StaticLinks?XMLEncode("$AWScript?${NewLinkParams}output=refererse"):"$PROG$StaticLinks.refererse.$StaticExt")."\"$NewLinkTarget>$Message[80]</a><br />\n";
if (scalar keys %_se_referrals_h) {
if ($ShowOriginStats =~ /H/i) { print "<td>".($_from_h[4]?$_from_h[4]:" ")."</td><td>".($_from_h[4]?"$p_h[4] %":" ")."</td>"; }
print "</tr>\n";
}
+ #------- Referrals by news group
+ #print "<tr><td class=\"aws\"><b>$Message[107]</b></td>";
+ #if ($ShowOriginStats =~ /P/i) { print "<td>".($_from_p[5]?$_from_p[5]:" ")."</td><td>".($_from_p[5]?"$p_p[5] %":" ")."</td>"; }
+ #if ($ShowOriginStats =~ /H/i) { print "<td>".($_from_h[5]?$_from_h[5]:" ")."</td><td>".($_from_h[5]?"$p_h[5] %":" ")."</td>"; }
+ #print "</tr>\n";
#------- Unknown origin
print "<tr><td class=\"aws\"><b>$Message[39]</b></td>";
if ($ShowOriginStats =~ /P/i) { print "<td>".($_from_p[1]?$_from_p[1]:" ")."</td><td>".($_from_p[1]?"$p_p[1] %":" ")."</td>"; }
if ($ShowOriginStats =~ /H/i) { print "<td>".($_from_h[1]?$_from_h[1]:" ")."</td><td>".($_from_h[1]?"$p_h[1] %":" ")."</td>"; }
print "</tr>\n";
&tab_end();
+ # 0: Direct
+ # 1: Unknown
+ # 2: SE
+ # 3: External link
+ # 4: Internal link
+ # 5: Newsgroup (deprecated)
}
print "\n<a name=\"keys\"> </a>\n\n";
# added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/
# -- fix - some robots were reported with _ where _ should have been a space.
# changed Xenu Link Sleuth
-# changed microsoft\_url\_control -> microsoft_url_control
+# changed microsoft[_+ ]url[_+ ]control -> microsoft_url_control
# changed favorites_sweeper -> favorites_sweeper
# -- updates
# updated AskJeeves to Ask
# used to know in which order to search Robot IDs.
# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more
# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more
-# Note: Robots IDs are in lower case, ' ' and '+' are changed into '_' and are quoted.
+# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+ ]' and are quoted.
#-------------------------------------------------------
@RobotsSearchIDOrder_list1 = (
# Common robots (In robot file)
'googlebot',
'google\-sitemaps',
'gulliver',
-'virus\_detector', # Must be before harvest
+'virus[_+ ]detector', # Must be before harvest
'harvest',
'htdig',
'linkwalker',
'lilina',
-'lycos_',
+'lycos[_+ ]',
'moget',
'muscatferret',
'myweb',
'nomad',
'scooter',
-'yahoo!_slurp_china', # Must come before singluar slurp or yahoo
'slurp',
'^voyager\/',
'weblayers',
'kapsi',
'katipo',
'kilroy',
-'ko_yappo_robot',
+'ko[_+ ]yappo[_+ ]robot',
'kummhttp',
'labelgrabber\.txt',
'larbin',
'snooper',
'solbot',
'speedy',
-'spider_monkey',
+'spider[_+ ]monkey',
'spiderbot',
'spiderline',
'spiderman',
'bender',
'biglotron',
'bittorrent_bot',
-'biz360_spider',
-'blogbridge_service',
+'biz360[_+ ]spider',
+'blogbridge[_+ ]service',
'bloglines',
'blogpulse',
# added OpenWebSpider http://www.openwebspider.org/
'bookmark\-manager',
'boris',
'bumblebee',
-'candlelight\_favorites\_inspector',
+'candlelight[_+ ]favorites[_+ ]inspector',
'cbn00glebot',
'cerberian_drtrs',
'cfnetwork',
'edgeio\-retriever',
'ets_v',
'exactseek',
-'extreme\_picture\_finder',
+'extreme[_+ ]picture[_+ ]finder',
'eventax',
'everbeecrawler',
'everest\-vulcan',
'hoowwwer',
'hpprint',
'htmlparser',
-'html\_link\_validator',
+'html[_+ ]link[_+ ]validator',
'httrack',
'hundesuche\.com\-bot',
'ichiro',
'infociousbot',
'infomine',
'insurancobot',
-'internet\_ninja',
+'internet[_+ ]ninja',
'internetarchive',
'internetseer',
'internetsupervision',
'irlbot',
'isearch2006',
'iupui_research_bot',
-'jrtwine\_software\_check\_favorites\_utility',
+'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility',
'justview',
'kalambot',
'kamano\.de_newsfeedverzeichnis',
'mediapartners\-google',
'megite',
'metaspinner',
-'microsoft_url_control',
+'microsoft[_+ ]url[_+ ]control',
'mini\-reptile',
'minirank',
'missigua_locator',
'nutch', # Must come after other nutch versions
'ocelli',
'octora_beta_bot',
-'omniexplorer\_bot',
-'onet\.pl\_sa',
+'omniexplorer[_+ ]bot',
+'onet\.pl[_+ ]sa',
'onfolio',
'opentaggerbot',
'openwebspider',
'pear_http_request_class',
'peerbot',
'perman',
-'php\_version\_tracker',
+'php[_+ ]version[_+ ]tracker',
'php_version_tracker',
'pictureofinternet',
'ping\.blo\.gs',
'sbider',
'schizozilla',
'scumbot',
-'searchguild\_dmoz\_experiment',
+'searchguild[_+ ]dmoz[_+ ]experiment',
'seekbot',
'sensis_web_crawler',
'seznambot',
'vortex',
'vse',
'w3c\-checklink',
-'w3c\_css\_validator\_jfouffa',
+'w3c[_+ ]css[_+ ]validator[_+ ]jfouffa',
'w3c_validator',
'wavefire',
'webclipping\.com',
'webfilter',
'webindexer',
'webminer',
-'website\_monitoring\_bot',
+'website[_+ ]monitoring[_+ ]bot',
'webvulncrawl',
'wells_search',
'wonderer',
'googlebot','<a href="http://www.google.com/bot.html" title="Bot home page [new window]" target="_blank">Googlebot</a>',
'google\-sitemaps', 'Google Sitemaps',
'gulliver','Northern Light Gulliver',
-'virus\_detector','<a href="http://www.securecomputing.com/" title="virus_harvester@securecomputing.com; Bot home page [new window]" target="_blank">virus_detector</a>',
+'virus[_+ ]detector','<a href="http://www.securecomputing.com/" title="virus_harvester@securecomputing.com; Bot home page [new window]" target="_blank">virus_detector</a>',
'harvest','Harvest',
'htdig','ht://Dig',
'linkwalker','LinkWalker',
'lilina','Lilina',
-'lycos_','Lycos',
+'lycos[_+ ]','Lycos',
'moget','moget',
'muscatferret','Muscat Ferret',
'myweb','Internet Shinchakubin',
'nomad','Nomad',
'scooter','Scooter',
-'yahoo!_slurp_china','<a href="http://misc.yahoo.com.cn/help.html" title="Bot home page [new window]" target="_blank">Yahoo! Slurp China</a>',
'slurp','<a href="http://help.yahoo.com/help/us/ysearch/slurp/" title="Bot home page [new window]" target="_blank">Yahoo Slurp</a>',
'^voyager\/','Voyager',
'weblayers','Weblayers',
'kapsi','image.kapsi.net',
'katipo','Katipo',
'kilroy','Kilroy',
-'ko_yappo_robot','KO_Yappo_Robot',
+'ko[_+ ]yappo[_+ ]robot','KO_Yappo_Robot',
'kummhttp','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b" title="Bot documentation page [new window]" target="_blank">KummHttp</a>',
'labelgrabber\.txt','LabelGrabber',
'larbin','<a href="http://para.inria.fr/~ailleret/larbin/index-eng.html" title="Bot home page [new window]" target="_blank">larbin</a>',
'snooper','Snooper',
'solbot','Solbot',
'speedy','<a href="http://www.entireweb.com/about/search_tech/speedyspider/" title="Speedy Spider home page [new window]" target="_blank">Speedy Spider</a>',
-'spider_monkey','spider_monkey',
+'spider[_+ ]monkey','Spider monkey',
'spiderbot','SpiderBot',
'spiderline','Spiderline Crawler',
'spiderman','<a href="http://www.iscrawling.com" title="Spiderman home page [new window]" target="_blank">Spiderman</a>',
'bender','<a href="http://bender.ucr.edu/" title="Bot home page [new window]" target="_blank">bender</a> <a href="http://ivia.ucr.edu/manuals/NiFC/current/index.shtml" title="Bot home page [new window]" target="_blank">focused_crawler</a>',
'biglotron','<a href="http://www.biglotron.com/robot.html" title="Bot home page [new window]" target="_blank">Biglotron</a>',
'bittorrent_bot','<a href="http://www.bittorrent.com/" title="Bot home page [new window]" target="_blank">BitTorrent Bot</a>',
-'biz360_spider','<a href="http://www.biz360.com/" title="blogsmanager@biz360.com Bot home page [new window]" target="_blank">Biz360 spider</a>',
-'blogbridge_service','<a href="http://www.blogbridge.com/" title="Bot home page [new window]" target="_blank">BlogBridge Service</a>',
+'biz360[_+ ]spider','<a href="http://www.biz360.com/" title="blogsmanager@biz360.com Bot home page [new window]" target="_blank">Biz360 spider</a>',
+'blogbridge[_+ ]service','<a href="http://www.blogbridge.com/" title="Bot home page [new window]" target="_blank">BlogBridge Service</a>',
'bloglines','<a href="http://www.bloglines.com/" title="Bot home page [new window]" target="_blank">Bloglines</a>',
'blogpulse','<a href="http://www.intelliseek.com/" title="Bot home page [new window]" target="_blank">BlogPulse ISSpider intelliseek.com</a>',
'blogsearch','<a href="http://www.icerocket.com/" title="Bot home page [new window]" target="_blank">BlogSearch</a>',
'bookmark\-manager','<a href="http://bkm.sourceforge.net/" title="Bookmark-Manager home page [new window]" target="_blank">Bookmark-Manager</a>',
'boris', 'Boris',
'bumblebee', 'Bumblebee (relevare.com)',
-'candlelight\_favorites\_inspector','<a href="http://www.candlelight.com/home.html" title="Candlelight_Favorites_Inspector home page [new window]" target="_blank">Candlelight_Favorites_Inspector</a>',
+'candlelight[_+ ]favorites[_+ ]inspector','<a href="http://www.candlelight.com/home.html" title="Candlelight_Favorites_Inspector home page [new window]" target="_blank">Candlelight_Favorites_Inspector</a>',
'cbn00glebot','cbn00glebot',
'cerberian_drtrs','<a href="http://www.pgts.com.au/cgi-bin/psql?robot_info=25240" title="Bot home page [new window]" target="_blank">Cerberian Drtrs</a>',
'cfnetwork','<a href="http://www.cocoadev.com/index.pl?CFNetwork" title="CFNetwork home page [new window]" target="_blank">CFNetwork</a>',
'edgeio\-retriever','<a href="http://www.edgeio.com/" title="Bot home page [new window]" target="_blank">edgeio-retriever</a>',
'ets_v','<a href="http://www.freetranslation.com/help/" title="ETS home page [new window]" target="_blank">ETS</a> Enterprise Translation Server',
'exactseek','ExactSeek Crawler',
-'extreme\_picture\_finder','<a href="http://www.exisoftware.com/" title="Extreme_Picture_Finder home page [new window]" target="_blank">Extreme_Picture_Finder</a>',
+'extreme[_+ ]picture[_+ ]finder','<a href="http://www.exisoftware.com/" title="Extreme_Picture_Finder home page [new window]" target="_blank">Extreme_Picture_Finder</a>',
'eventax','<a href="http://www.eventax.de/" title="eventax home page [new window]" target="_blank">eventax</a>',
'everbeecrawler','EverbeeCrawler',
'everest\-vulcan','<a href="http://everest.vulcan.com/crawlerhelp" title="Bot home page [new window]" target="_blank">Everest-Vulcan</a>',
'hoowwwer','<a href="http://cosco.hiit.fi/search/hoowwwer/" title="HooWWWer home page [new window]" target="_blank">HooWWWer</a>',
'hpprint','HPPrint',
'htmlparser','<a href="http://htmlparser.sourceforge.net/" title="HTMLParser home page [new window]" target="_blank">HTMLParser</a>',
-'html\_link\_validator','<a href="http://www.lithopssoft.com/ " title="Html_Link_Validator home page [new window]" target="_blank">Html_Link_Validator</a>',
+'html[_+ ]link[_+ ]validator','<a href="http://www.lithopssoft.com/ " title="Html_Link_Validator home page [new window]" target="_blank">Html_Link_Validator</a>',
'httrack','<a href="http://www.httrack.com/" title="Bot home page [new window]" target="_blank">HTTrack off-line browser</a>',
'hundesuche\.com\-bot','<a href="http://www.hundesuche.com/" title="Hundesuche.com-Bot home page [new window]" target="_blank">Hundesuche.com-Bot</a>',
'ichiro','<a href="http://help.goo.ne.jp/door/crawlerE.html" title="Bot home page [new window]" target="_blank">ichiro</a>',
'infociousbot','<a href="http://corp.infocious.com/tech_crawler.php" title="InfociousBot home page [new window]" target="_blank">InfociousBot</a>',
'infomine','<a href="http://infomine.ucr.edu/useragents" title="Bot home page [new window]" target="_blank">INFOMINE VLCrawler</a>',
'insurancobot','<a href="http://www.fastspywareremoval.com/" title="InsurancoBot home page [new window]" target="_blank">InsurancoBot</a>',
-'internet\_ninja','<a href="http://www.dti.ne.jp/ " title="Internet_Ninja home page [new window]" target="_blank">Internet_Ninja </a>',
+'internet[_+ ]ninja','<a href="http://www.dti.ne.jp/ " title="Internet_Ninja home page [new window]" target="_blank">Internet_Ninja </a>',
'internetarchive','<a href="http://lucene.apache.org/nutch/bot.html " title="InternetArchive home page [new window]" target="_blank">InternetArchive</a>',
'internetseer', 'InternetSeer',
'internetsupervision','<a href="http://internetsupervision.com/" title="InternetSupervision home page [new window]" target="_blank">InternetSupervision</a>',
'irlbot','<a href="http://irl.cs.tamu.edu/crawler" title="Bot home page [new window]" target="_blank">IRLbot</a>',
'isearch2006','<a href="http://www.yahoo.com.cn/" title="isearch2006 home page [new window]" target="_blank">isearch2006</a>',
'iupui_research_bot','<a href="http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/" title="IUPUI_Research_Bot home page [new window]" target="_blank">IUPUI_Research_Bot</a>',
-'jrtwine\_software\_check\_favorites\_utility','<a href="http://www.jrtwine.com/Products/CheckFavs/" title="JRTwine_Software_Check_Favorites_Utility home page [new window]" target="_blank">JRTwine_Software_Check_Favorites_Utility</a>',
+'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility','<a href="http://www.jrtwine.com/Products/CheckFavs/" title="JRTwine_Software_Check_Favorites_Utility home page [new window]" target="_blank">JRTwine_Software_Check_Favorites_Utility</a>',
'justview', 'JustView',
'kalambot','<a href="http://64.124.122.251/feedback.html" title="KalamBot home page [new window]" target="_blank">KalamBot</a>',
'kamano\.de_newsfeedverzeichnis','<a href="http://www.kamano.de/" title="kamano.de NewsFeedVerzeichnis home page [new window]" target="_blank">kamano.de NewsFeedVerzeichnis</a>',
'megite','<a href="http://www.megite.com/" title="Megite home page [new window]" target="_blank">Megite</a>',
'metager\-linkchecker','MetaGer LinkChecker',
'metaspinner','<a href="http://index.meta-spinner.de/" title="Metaspinner home page [new window]" target="_blank">Metaspinner</a>',
-'microsoft_url_control','<a href="http://www.webmasterworld.com/forum11/1005.htm" title="Microsoft URL Control home page [new window]" target="_blank">Microsoft URL Control</a>',
+'microsoft[_+ ]url[_+ ]control','<a href="http://www.webmasterworld.com/forum11/1005.htm" title="Microsoft URL Control home page [new window]" target="_blank">Microsoft URL Control</a>',
'minirank','<a href="http://minirank.com/" title="miniRank home page [new window]" target="_blank">miniRank</a>',
'mini\-reptile','Mini-reptile',
'missigua_locator','<a href="http://www.webmasterworld.com/forum11/2690.htm" title="Missigua_Locator home page [new window]" target="_blank">Missigua_Locator</a>',
'nutch','<a href="http://lucene.apache.org/nutch/" title="Bot home page. Used by many, including Looksmart. [new window]" target="_blank">Nutch</a>',
'ocelli','<a href="http://www.globalspec.com/Ocelli/" title="Ocelli home page [new window]" target="_blank">Ocelli</a>',
'octora_beta_bot','<a href="http://www.octora.com/" title="Bot home page [new window]" target="_blank">Octora Beta Bot</a>',
-'omniexplorer\_bot','<a href="http://www.omni-explorer.com/" title="Bot home page. [new window]" target="_blank">OmniExplorer Bot</a>',
-'onet\.pl\_sa','<a href="http://szukaj.onet.pl/" title="Onet.pl_SA home page [new window]" target="_blank">Onet.pl_SA</a>',
+'omniexplorer[_+ ]bot','<a href="http://www.omni-explorer.com/" title="Bot home page. [new window]" target="_blank">OmniExplorer Bot</a>',
+'onet\.pl[_+ ]sa','<a href="http://szukaj.onet.pl/" title="Onet.pl_SA home page [new window]" target="_blank">Onet.pl_SA</a>',
'onfolio','<a href="http://www.onfolio.com/" title="Bot home page [new window]">Onfolio</a>',
'opentaggerbot','<a href="http://www.opentagger.com/opentaggerbot.htm" title="Bot home page [new window]">OpenTaggerBot</a>',
'openwebspider','<a href="http://www.openwebspider.org/" title="OpenWebSpider home page [new window]" target="_blank">OpenWebSpider</a>',
'pear_http_request_class','<a href="http://pear.php.net/" title="PEAR HTTP Request class home page [new window]" target="_blank">PEAR HTTP Request class</a>',
'peerbot','<a href="http://www.peerbot.com/" title="PEERbot home page [new window]" target="_blank">PEERbot</a>',
'perman', 'Perman surfer',
-'php\_version\_tracker','<a href="http://www.nexen.net/phpversion/bot.php" title="PHP_version_tracker home page [new window]" target="_blank">PHP_version_tracker</a>',
+'php[_+ ]version[_+ ]tracker','<a href="http://www.nexen.net/phpversion/bot.php" title="PHP_version_tracker home page [new window]" target="_blank">PHP_version_tracker</a>',
'php_version_tracker','<a href="http://www.nexen.net/phpversion/bot.php" title="PHP version tracker home page [new window]" target="_blank">PHP version tracker</a>',
'pictureofinternet','<a href="http://malfunction.org/poi/" title="PictureOfInternet home page [new window]" target="_blank">PictureOfInternet</a>',
'ping\.blo\.gs','<a href="http://blo.gs/ping.php" title="Bot home page. [new window]" target="_blank">ping.blo.gs</a>',
'sbider','<a href="http://www.sitesell.com/sbider.html" title="Bot home page [new window]" target="_blank">SBIder</a>',
'schizozilla','<a href="http://spamhuntress.com/2005/03/18/gizmo/ " title="Schizozilla home page [new window]" target="_blank">Schizozilla</a>',
'scumbot','Scumbot',
-'searchguild\_dmoz\_experiment','<a href="http://www.searchguild.com/" title="SearchGuild_DMOZ_Experiment home page [new window]" target="_blank">SearchGuild_DMOZ_Experiment</a>',
+'searchguild[_+ ]dmoz[_+ ]experiment','<a href="http://www.searchguild.com/" title="SearchGuild_DMOZ_Experiment home page [new window]" target="_blank">SearchGuild_DMOZ_Experiment</a>',
'seekbot','<a href="http://www.seekbot.net/bot.html" title="Bot home page [new window]">Seekbot</a>',
'sensis_web_crawler','<a href="http://www.sensis.com.au/" title="Sensis Web Crawler home page [new window]" target="_blank">Sensis Web Crawler</a>',
'seznambot','<a href="http://fulltext.seznam.cz/" title="Bot home page [new window]" target="_blank">SeznamBot</a>',
'vortex','<a href="http://marty.anstey.ca/projects/robots/vortex/" title="Bot home page [new window]" target="_blank">VORTEX</a>',
'vse','<a href="http://www.vivisimo.com/" title="VSE home page [new window]" target="_blank">VSE</a>',
'w3c\-checklink','<a href="http://validator.w3.org/checklink/" title="Bot home page [new window]" target="_blank">W3C Link Checker</a>',
-'w3c\_css\_validator\_jfouffa', '<a href="http://jigsaw.w3.org/css-validator/" title="Bot home page [new window]" target="_blank">W3C jigsaw CSS Validator</a>',
+'w3c[_+ ]css[_+ ]validator[_+ ]jfouffa', '<a href="http://jigsaw.w3.org/css-validator/" title="Bot home page [new window]" target="_blank">W3C jigsaw CSS Validator</a>',
'w3c_validator','<a href="http://validator.w3.org/" title="Bot home page [new window]" target="_blank">W3C Validator</a>',
'wavefire','<a href="http://www.wavefire.com" title="info@wavefire.com; Bot home page [new window]" target="_blank">Wavefire</a>',
'webclipping\.com', 'WebClipping.com',
'webfilter','<a href="http://www.verso.com/enterprise/netspective/webfilter.asp" title="Bot home page [new window]" target="_blank">WebFilter</a>',
'webindexer','<a href="mailto://webindexerv1@yahoo.com" title="WebIndexer home page [new window]" target="_blank">WebIndexer</a>',
'webminer','<a href="http://64.124.122.252/feedback.html" title="WebMiner home page [new window]" target="_blank">WebMiner</a>',
-'website\_monitoring\_bot','<a href="http://InternetSupervision.com/UrlMonitor/3/" title="Website_Monitoring_Bot home page [new window]" target="_blank">Website_Monitoring_Bot</a>',
+'website[_+ ]monitoring[_+ ]bot','<a href="http://InternetSupervision.com/UrlMonitor/3/" title="Website_Monitoring_Bot home page [new window]" target="_blank">Website_Monitoring_Bot</a>',
'webvulncrawl', 'WebVulnCrawl',
'wells_search','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b " title="Wells Search home page [new window]" target="_blank">Wells Search</a>',
'wonderer', 'Web Wombat Redback Spider',