From: Laurent Destailleur <eldy@destailleur.fr>
Date: Sun, 28 Jul 2024 13:42:06 +0000 (+0200)
Subject: Update test files
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=48d18c2334b78ab9bfd77e1ebe590bb808d331e7;p=thirdparty%2FAWStats.git

Update test files
---

diff --git a/robots.pm b/robots.pm
deleted file mode 100644
index 6290432a..00000000
--- a/robots.pm
+++ /dev/null
@@ -1,2786 +0,0 @@
-# AWSTATS ROBOTS DATABASE
-#-------------------------------------------------------
-# If you want to add robots to extend AWStats database detection capabilities,
-# you must add an entry in RobotsSearchIDOrder_listx and RobotsHashIDLib.
-
-# The entry in RobotsSearchIDOrder_listx is a Perl regular expression
-# (see http://perldoc.perl.org/perlreref.html). AWSTats applies these
-# expressions to the user agent string in the order given by the lists. The
-# first match specifies the robot.
-#
-# Note: This regular expression must not contain any whitespace.
-# Otherwise AWStats will produce lines in the database that
-# will be misinterpreted and as a consequence the corresponding data in the
-# generated HTML reports will be wrong. If you want to match whitespace in
-# the user agent string, use other constructs like '\s', '[:blank:]',
-# '\p{IsSpace}', '\x20' etc.
-#
-# The corresponding entry in RobotsHashIDLib contains the regular expression
-# as key, followed by a string containing HTML-text. AWStats inserts this
-# text into reports to describe the bot. If possible the text should contain
-# a link to the bot home page. This makes it easier for sysadmins to find
-# the information necessary e.g. to adapt the robots.txt file.
-#
-# An entry in the RobotsAffiliateLib is not necessary. An entry in this list
-# contains as first part the regular expression specifying the bot. The
-# second part is a string that gives the Company or product managing the bot.
-# This information is not used yet.
-#
-# There are several sorts of bots that AWStats is not able to detect and
-# therefore a considerable amount of bot generated traffic counts
-# as user traffic:
-#
-# a) A crawler that identifies itself in the referrer string, but not in
-#    the user agent string. An example is the crawler from semalt.semalt.com.
-#
-# b) Crawlers that correctly access robots.txt but identify themselves in
-#    in the user agent string only once or just a few times. Most of the
-#    time a user agent string ist used that does not contain hints that
-#    a bot is involved. An example is the iCjobs spider.
-#    msnbot-UDiscovery/2.0b seems to show this behaviour too.
-#
-#
-#
-#-------------------------------------------------------
-
-# 2023-07-04 RobC 
-#              Removed Dalvik as native Android UI Browser User Agent
-#              Removed CFNetwork as native iOS and OSX Browser User Agent
-
-# 2021-05--05 RobC
-
-# Removed Baidu catchall because its picking up baidu.sogo.uc.UCBrowser which is a phone browser
-# Added baiduspider- catchall instead
-
-# Newly added from 2021-05-05
-# Adsbot
-# BW/
-# Bytespider
-# CheckMarkNetwork/
-# DuckDuckBot
-# # Foregenix Web Scan
-# IonCrawl
-# Linguee Bot
-# Neevabot
-# PetalBot
-# TkBot
-# vuhuvBot
-
-
-# 2018-03-13 RobC 
-#              Added 36 robots and one generic ( survey ) using v 7.7 robots file as base. 
-#              Also moved robot "Obot" into generics so that it is singled out as an individual Robot.         
-#
-# 2016-09-02 RobC 
-#              Fixed a few errors and added a few missing bots from awstats 7.5 release.
-#
-# 2016-08-28 RobC 
-#              Complete re-build of this file almost from scratch.
-#              dropped many old bots, added many new bots and reordered file.
-#              edited and added regex expressions to stop spaces causing problems.
-#              You should tune file by placing the most common robots crawling your site at top 
-#              in List1.
-#
-#
-#              N.B. many bots need to be in correct order so don't chnage order without checking if
-#              change will cause counts to be allocated to wrong bot. Not always simple.
-#
-#
-# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html
-#              added dipsie (not tested with real data).
-#              added DomainsDB.net http://domainsdb.net/
-#              added ia_archiver-web.archive.org (was inadvertently grouped with Alexa traffic)
-#              added Nutch (used by looksmart (furl?))
-#              added rssImagesBot
-#              added Sqworm
-#              added t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e
-#              added w3c css-validator
-#              added documentation link to bot home pages for above and selected major bots.
-#                    In the case of international bots, choose .com page.
-#                    Included tool tip (html "title").
-#                    To do: parameterize to match both AWStats language and tooltips settings.
-#                    To do: add html links for all bots based on current documentation in source
-#                           files referenced below.
-#              changed '\wbot[\/\-]', to '\wbot[\/\-]' (removed comma)
-#              made minor grammar corrections to notes below
-# 2005-08-24	added YahooSeeker-Testing
-#              	added w3c-checklink
-#              	updated url for ask.com
-# 2005-08-24   	added Girafabot http://www.girafa.com/
-# 2005-08-30   	added PluckFeedCrawler http://www.pluck.com/
-#		added Gaisbot/3.0 (robot05@gais.cs.ccu.edu.tw; )
-#		dded geniebot (wgao@genieknows.com)
-#		added BecomeBot link http://www.become.com/site_owners.html
-#		added topicblogs http://www.topicblogs.com/
-#		added Powermarks; seen used by referrer spam
-#		added YahooSeeker
-#		added NG/2. http://www.exabot.com/
-# 2005-09-15	added link for Walhello appie
-#		added bender focused_crawler
-#		updated YahooSeeker description (blog crawler)
-# 2005-09-16	added link for http://linkchecker.sourceforge.net
-# 		added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl)
-#		added Blogslive  info@blogslive.com intelliseek.com
-#		added BlogPulse (ISSpider-3.0) intelliseek.com
-# 2005-09-26	added Feedfetcher-Google (http://www.google.com/feedfetcher.html)
-#		added EverbeeCrawler
-#		added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html
-#		added link for Bloglines http://www.bloglines.com
-# 2005-10-19	fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html)
-# 		added Blogshares Spiders (Synchronized V1.5.1)
-#		added yacy
-# 2005-11-21	added Argus www.simpy.com
-#		added BlogsSay :: RSS Search Crawler (http://www.blogssay.com/)
-#		added MJ12bot http://majestic12.co.uk/bot.php
-#		added OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)
-#		added OutfoxBot/0.3 (For internet experiments; outfox.agent@gmail.com)
-#		added RufusBot Rufus Web Miner http://64.124.122.252.webaroo.com/feedback.html
-#		added Seekbot (http://www.seekbot.net/bot.html)
-#		added Yahoo-MMCrawler/3.x (mms-mmcrawler-support@yahoo-inc.com)
-#               added link for BaiDuSpider
-#		added link for Blogshares Spider
-#		added link for StackRambler http://www.rambler.ru/doc/faq.shtml
-#		added link for WISENutbot
-#		added link for ZyBorg/1.0 (wn-14.zyborg@looksmart.net; http://www.WISEnutbot.com.  Moved location to above wisenut to avoid classification as wisenut
-# 2005-12-15
-#		added FAST Enteprise Crawler/6 (www dot fastsearch dot com). Note spelling Enteprise not Enterprise.
-#		added findlinks http://wortschatz.uni-leipzig.de/findlinks/
-#		added IBM Almaden Research Center WebFountainâ¢ http://www.almaden.ibm.com/cs/crawler [hc3]
-#		added INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)
-#		added lmspider (lmspider@scansoft.com) http://www.nuance.com/
-#		added noxtrumbot http://www.noxtrum.com/
-#		added SandCrawler (Microsoft)
-#		added SBIder http://www.sitesell.com/sbider.html
-#		added SeznamBot http://fulltext.seznam.cz/
-#		added sohu-search http://corp.sohu.com/ (looked for //robots.txt not /robots.txt)
-#		added the ruffle SemanticWeb crawler v0.5 - http://www.unreach.net
-#		added WebVulnCrawl/1.0 libwww-perl/5.803 (looked for //robots.txt not /robots.txt)
-#		added Yahoo! Japan keyoshid http://www.yahoo.co.jp/
-#		added Y!J http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html
-#		added link for GigaBot
-#		added link for MagpieRSS
-#		added link for MSIECrawler
-# 2005-12-21
-#		added aipbot http://www.aipbot.com aipbot@aipbot.com [matthys70 users.sourceforge.net]
-#		added Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)
-#		added Fast-Search-Engine http://www.fast-search-engine.com/ [matthys70  users.sourceforge.net]
-#		added g2Crawler (nobody@airmail.net) http://crawler.instantnetworks.net/
-#		added Jakarta commons-httpclient http://jakarta.apache.org/commons/httpclient/ (hit robots.txt).  May be used as robot or browser - a site may want to remove this entry.
-#		added OmniExplorer_Bot http://www.omni-explorer.com/ [matthys70 users.sourceforge.net]
-#		added USTC-Semantic-Group ai.ustc.edu.cn/mas/en/research/index.php ?
-# 2005-12-22
-#		added EARTHCOM.info www.earthcom.info
-#		added HTTrack off-line browser 'httrack','HTTrack', http://www.httrack.com/ [Moizes Gabor]
-#		added KummHttp http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b [Moizes Gabor]
-# 2006-01-01
-#		added Dulance http://www.dulance.com/bot.jsp
-#		added MojeekBot http://www.mojeek.com/bot.html
-#		added nicebot http://www.egghelp.org/setup.htm ?
-#		added Snappy http://www.urltrends.com/faq.php
-#		added sohu agent
-#		added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net]
-#		added zspider http://feedback.redkolibri.com/
-# 2006-01-13
-#		added boitho.com-dc http://www.boitho.com/dcbot.html
-#		added IRLbot http://irl.cs.tamu.edu/crawler
-#		added virus_detector virus_harvester@securecomputing.com
-#		added Wavefire http://www.wavefire.com; info@wavefire.com
-
-#		added WebFilter Robot
-# 2006-01-24
-#		added Shim-Crawler http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp
-#		added Exabot exabot.com
-#		added LetsCrawl.com http://letscrawl.com
-#		added ichiro http://help.goo.ne.jp/door/crawlerE.html
-# 2006-01-27    additional 22 robots from a list provided by Moizes Gabor
-#		added ALeadSoftbot	http://www.aleadsoft.com/bot.htm
-#		added CipinetBot	http://www.cipinet.com/bot.html
-#		added Cuasarbot	http://www.cuasar.com/
-#		added Dumbot	http://www.dumbfind.com/
-#		added Extreme_Picture_Finder	http://www.exisoftware.com/
-#		added Fooky.com/ScorpionBot/ScoutOut	http://www.fooky.com/scorpionbots
-#		added IlTrovatore-Setaccio	http://www.iltrovatore.it/aiuto/motore_di_ricerca.html	bot@iltrovatore.it
-#		added InsurancoBot	http://www.fastspywareremoval.com/
-#		added InternetArchive	http://lucene.apache.org/nutch/bot.html 	nutch-agent@lucene.apache.org
-#		added KazoomBot	http://www.kazoom.ca/bot.html	kazoombot@kazoom.ca
-#		added Kurzor	http://www.easymail.hu/	cursor@easymail.hu
-#		added NutchCVS	http://lucene.apache.org/nutch/bot.html	nutch-agent@lucene.apache.org
-#		added NutchOSU-VLIB	http://lucene.apache.org/nutch/bot.html	nutch-agent@lucene.apache.org
-#		added Orbiter	http://www.dailyorbit.com/bot.htm
-#		added PHP_version_tracker	http://www.nexen.net/phpversion/bot.php
-#		added SuperBot	http://www.sparkleware.com/superbot/
-#		added SynooBot	http://www.synoo.de/bot.html	webmaster@synoo.com
-#		added TestBot	http://www.agbrain.com/
-#		added TutorGigBot	http://www.tutorgig.info/
-#		added WebIndexer	mailto://webindexerv1@yahoo.com
-#		added WebMiner	http://64.124.122.252/feedback.html
-# 2006-02-01
-#		added heritrix https://sourceforge.net/forum/message.php?msg_id=3550202
-#		added Zeus Webster Pro https://sourceforge.net/forum/message.php?msg_id=3141164
-#               additional robots from a list provided by Moizes Gabor [ mojzi -a-t- free mail hu ]
-#		added Candlelight_Favorites_Inspector
-#		added DomainChecker
-#		added EasyDL
-#		added FavOrg
-#		added Favorites_Sweeper
-#		added Html_Link_Validator
-#		added Internet_Ninja
-#		added JRTwine_Software_Check_Favorites_Utility
-#		fixed Microsoft_URL_Control
-#		added miniRank
-#		added Missigua_Locator
-#		added NPBot
-#		added Ocelli
-#		added Onet.pl_SA
-#		added proodleBot
-#		added SearchGuild_DMOZ_Experiment
-#		added Susie
-#		added Website_Monitoring_Bot
-#		added Xenu_Link_Sleuth
-# 2006-05-15
-#		added ASPseek http://www.aspseek.org/
-#		added AdamM Bot http://home.blic.net/adamm/
-#		added archive.org_bot http://crawls.archive.org/collections/bncf/crawl.html
-#		added arianna.libero.it (Italian Portal/search engine)
-#		added Biz360 spider http://www.biz360.com
-#		added BlogBridge Service http://www.blogbridge.com/
-#		added BlogSearch http://www.icerocket.com/
-#		added libcrawl
-#		added edgeio-relanshanbottriever http://www.edgeio.com
-#		added FeedFlow http://feedflow.com/about
-#		added Biblioteca Nazionale Centrale di Firenze (Italian National Archive) http://www.bncf.firenze.sbn.it/raccolta.txt
-#		added Java catchall - used by many spam bots
-#		added lanshanbot http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_g_l_140406_1%5Cb
-#		added msnbot-media http://search.msn.com/msnbot.htm
-#		added MT::Telegraph::Agent
-#		added Netluchs http://www.netluchs.de/ (German SE bot)
-#		added oBot http://www.webmasterworld.com/forum11/1616.htm
-#		added Onfolio http://www.onfolio.com/  (IE Toolbar plugin) - hit rss feeds.
-#		added ping.blo.gs http://blo.gs/ping.php blog bot
-#		added Sphere Scout http://www.sphere.com/
-#		added sproose crawler http://www.sproose.com/bot.html
-#		added SyndicAPI http://syndicapi.com/bot.html
-#		added Yahoo! Mindset http://mindset.research.yahoo.com/
-#		added msrabot
-#		added Vagabondo & Vagabondo-WAP http://www.wise-guys.nl/Contact/index.php?botselected=webagents)#=uk
-#		fixed Missigua Locator detection (Missigua_Locator -> Missigua Locator)
-#		changed echo to echo! to avoid conflict with the bonecho (Firefox 2.0) browser.
-#			This requires you to reprocess historic logs if you want EchO! to be recognized for older reports.
-# 2006-05-17
-#		added Alpha Search Agent # 62.152.125.60 Eurologon Srl
-#		added Krugle http://www.krugle.com/crawler/info.html the search engine for developers
-#		added Octora Beta Bot http://www.octora.com/ # Blog and Rss Search Engine
-#		added UbiCrawler http://law.dsi.unimi.it/ubicrawler/
-#		added Yahoo! Slurp China http://misc.yahoo.com.cn/help.html
-#			You must reprocess old logs for the Yahoo! Slurp China bot to be detected in old reports
-# 2006-05-20
-#		added 1-More Scanner http://www.myzips.com/software/1-More-Scanner.phtml
-#		added Accoona-AI-Agent http://www.accoona.com/
-#		added ActiveBookmark http://www.libmaster.com/active_bookmark.php
-#		added BIGLOTRON http://www.biglotron.com/robot.html
-#		added Bookmark-Manager http://bkm.sourceforge.net/
-#		added cbn00glebot
-#		added Cerberian Drtrs http://www.pgts.com.au/cgi-bin/psql?robot_info=25240
-#		added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork
-#		added CheckWeb link validator http://p.duby.free.fr/chkweb.htm
-#		added Computer and Automation Research Institute Crawler http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html
-#		added ConveraCrawler http://www.authoritativeweb.com/crawl/
-#		added ConveraMultiMediaCrawler http://www.authoritativeweb.com/crawl/
-#		added CSE HTML Validator Lite Online http://online.htmlvalidator.com/php/onlinevallite.php
-#		added Cursor http://adcenter.hu/docs/en/bot.html
-#		added Custo http://www.netwu.com/custo/
-#		added DataFountains/DMOZ Downloader http://infomine.ucr.edu/
-#		added Deepindex http://www.deepindex.net/faq.php
-#		added DNSGroup http://www.dnsgroup.com/
-#		added DoCoMo http://www.nttdocomo.co.jp/
-#		added dumm.de-Bot http://www.dumm.de/
-#		added ETS v http://www.freetranslation.com/help/
-#		added eventax http://www.eventax.de/
-#		added FAST Enterprise Crawler * crawleradmin.t-info@telekom.de http://www.telekom.de/
-#		added FAST Enterprise Crawler http://www.fast.no/
-#		added FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de http://www.telekom.de/
-#		added FeedValidator http://feedvalidator.org/
-#		added FilmkameraBot http://www.filmkamera.at/bot.html
-#		added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece
-#		added Global Fetch http://www.wesonet.com/
-#		added GOFORITBOT http://www.goforit.com/about/
-#		added GoForIt.com http://www.goforit.com/about/
-#		added GPU p2p crawler http://gpu.sourceforge.net/search_engine.php
-#		added HooWWWer http://cosco.hiit.fi/search/hoowwwer/
-#		added HPPrint
-#		added HTMLParser http://htmlparser.sourceforge.net/
-#		added Hundesuche.com-Bot http://www.hundesuche.com/
-#		added InfoBot http://www.infobot.org/
-#		added InfociousBot http://corp.infocious.com/tech_crawler.php
-#		added InternetSupervision http://internetsupervision.com/
-#		added isearch2006 http://www.yahoo.com.cn/
-#		added IUPUI_Research_Bot http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/
-#		added KalamBot http://64.124.122.251/feedback.html
-#		added kamano.de NewsFeedVerzeichnis http://www.kamano.de/
-#		added Kevin http://dznet.com/kevin/
-#		added KnowItAll http://www.cs.washington.edu/research/knowitall/
-#		added Knowledge.com http://www.knowledge.com/
-#		added Kouaa Krawler http://www.kouaa.com/
-#		added ksibot http://ego.ms.mff.cuni.cz/
-#		added Link Valet Online http://www.htmlhelp.com/tools/valet/
-#		added lwp-request http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request
-#		added lwp-trivial http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm
-#		added MapoftheInternet.com http://MapoftheInternet.com/
-#		added Matrix S.p.A. - FAST Enterprise Crawler http://tin.virgilio.it/
-#		added Megite http://www.megite.com/
-#		added Metaspinner http://index.meta-spinner.de/
-#		added Mini-reptile
-#		added Misterbot http://www.misterbot.fr/
-#		added Miva http://www.miva.com/
-#		added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_m_141105_2\b
-#		added MSRBOT http://research.microsoft.com/research/sv/msrbot/
-#		added MS SharePoint Portal Server - MS Search 4.0 Robot http://support.microsoft.com/default.aspx?scid=kb;en-us;284022
-#		added Mydoyouhike http://www.doyouhike.net/my
-#		added NASA Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b
-#		added NetSprint http://www.netsprint.pl/serwis/
-#		added NimbleCrawler http://www.healthline.com/
-#		added OpenWebSpider http://www.openwebspider.org/
-#		added Oracle Ultra Search http://www.oracle.com/technology/products/ultrasearch/index.html
-#		added OSSProxy http://www.marketscore.com/FAQ.Aspx
-#		added passwordmaker.org http://passwordmaker.org/
-#		added PEAR HTTP Request class http://pear.php.net/
-#		added PEERbot http://www.peerbot.com/
-#		added PHP version tracker http://www.nexen.net/phpversion/bot.php
-#		added PictureOfInternet http://malfunction.org/poi/
-#		added plinki http://www.plinki.com/
-#		added Port Huron Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1133\b
-#		added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1135\b
-#		added ProjectWF-java-test-crawler
-#		added PyQuery http://sourceforge.net/projects/pyquery/
-#		added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/
-#		added Scumbot
-#		added Sensis Web Crawler http://www.sensis.com.au/
-#		added snap.com beta crawler http://www.snap.com/
-#		added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/
-#		added STEROID  Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm
-#		added Suchfin-Bot http://www.suchfin.de/
-#		added Sunrise http://www.sunrisexp.com/
-#		added Tagyu Agent http://www.tagyu.com/
-#		added Tcl http client package http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm
-#		added TeragramCrawlerSURF http://www.teragram.com/
-#		added Test Crawler http://netp.ath.cx/
-#		added UnChaos Bot Hybrid Web Search Engine http://www.unchaos.com/
-#		added unido-bot http://www.unchina.org/unido/unido/our_projects/3_3.html
-#		added UniversalFeedParser http://feedparser.org/ (seen from md301000.inktomisearch.com)
-#		added updated http://www.updated.com/
-#		added Vermut http://vermut.aol.com
-#		added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html
-#		added Vespa Crawler (Yahoo Norway?) http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb
-#		added VSE http://www.vivisimo.com/
-#		added webcrawl.net http://www.webcrawl.net/
-#		added Web Downloader http://www.krasu.ru/soft/chuchelo/
-#		added Webdup http://www.webdup.com/en/index.html
-#		added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b
-#		added WordPress http://wordpress.org/
-#		added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/
-#		added Xenu's Link Sleuth (with ')
-#		added xirq http://www.xirq.com/
-#		added yoogliFetchAgent http://www.yoogli.com/
-#		added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/
-#		-- fix - some robots were reported with _ where _ should have been a space.
-#		changed Xenu Link Sleuth
-#		changed microsoft[_+\s]url[_+\s]control -> microsoft_url_control
-#		changed favorites_sweeper -> favorites_sweeper
-#		-- updates
-#		updated AskJeeves to Ask
-# 2012-06-05 Albrecht Mueller
-#              added Grabber from SDSC (San Diego Supercomputer Center).
-# 2013-09-30 Albrecht Mueller
-# AWStats probably cannot detect this bot as it identifies itself in
-# the referrer field and not in the user agent string.
-#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
-#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
-#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
-
-# to do  MS Search 4.0 Robot
-
-#package AWSROB;
-
-
-# Robots list was found at http://www.robotstxt.org/wc/active/all.txt
-# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html
-# Rem: To avoid bad detection, some robot's ids were removed from this list:
-#      - Robots with ID of 3 letters only
-#      - Robots called 'webs' and 'tcl'
-# Rem: directhit changed into direct_hit (its real id)
-# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser
-# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser
-# Rem: roadrunner changed into road_runner
-# Rem: lycos changed to lycos_ to avoid confusion with lycos-online browser
-# Rem: voyager changed into ^voyager\/ to avoid to exclude voyager and amigavoyager browser
-
-# RobotsSearchIDOrder
-# It contains all matching criteria to search for in log fields. This list is
-# used to know in which order to search Robot IDs.
-# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more
-# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more
-# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+\s]' and are quoted.
-#-------------------------------------------------------
-
-
-@RobotsSearchIDOrder_list1 = (
-# Common robots (In robot file)
-'bingbot/',
-'bingpreview',
-'MSIECrawler',
-'msnbot/',
-'msnbot\-media/',
-'AdIdxBot/',
-'NOT[\x20]Googlebot/',
-'Googlebot/',
-'Google[\x20]Web[\x20]Preview',
-'Googlebot\-Image/',
-'Googlebot\-Mobile/',
-'Google[\x20]Page[\x20]Speed',
-'google\-sitemaps',
-'Googlebot\-News',
-'Googlebot\-Video/',
-'AdsBot\-Google[\x20]\(',
-'AdsBot\-Google\-Mobile\-Apps',
-'Adsbot',
-'Mediapartners-Google',
-'Feedfetcher\-Google',
-'Google\-Adwords\-Instant',
-'Firefox/1\.5',
-'Yahoo![\x20]Slurp[\x20]China',
-'Yahoo![\x20]Slurp',
-'Baiduspider/',
-'Baiduspider\-image',
-'Baiduspider-',
-'YandexBot/',
-'YandexImages/',
-'YandexImageResizer',
-'YandexMetrika/',
-'YandexMobileBot/',
-'yandex',
-'electricmonk/',
-'spbot/',
-'SeznamBot/',
-'msie8',
-'AhrefsBot/',
-'007ac9[\x20]Crawler',
-'2345Explorer/',
-'360Spider',
-'A[\x20]Simple[\x20]Crawler',
-'Abrave',
-'acapbot/',
-'Accoona\-AI\-Agent/',
-'arcemedia',
-'AdnormCrawlerCatchBot/',
-'adscanner',
-'aiHitBot/',
-'aipbot/',
-'AlphaBot',
-'Apache\-HttpClient/',
-'Apexoo[\x20]Spider',
-'Applebot/',
-'archive\.org_bot',
-'Babya[\x20]Discoverer',
-'Barkrowler',
-'BDCbot/',
-'BellPagesCA/',
-'BeNosey[\x20]Mohawk[\x20]Search',
-'bhcBot',
-'bidswitchbot',
-'BigBozz/',
-'BinGet/',
-'bitlybot',
-'bl\.uk_lddc_bot/',
-'BLEXBot/',
-'bnf.fr_bot',
-'boitho\.com\-dc/',
-'BoogleBot',
-'BusinessBot:',
-'BW/',
-'Bytespider',
-'CatchBot/',
-'CB/Nutch',
-'CCBot/',
-'CheckMarkNetwork/',
-'Cliqzbot/',
-'CMS[\x20]Crawler',
-'Companybook\-Crawler',
-'ConveraCrawler/',
-'Contacts-Crawler',
-'contxbot',
-'cosmos/',
-'crawl/Nutch',
-'crawler4j',
-'CRAZYWEBCRAWLER',
-'CRMNLCrawlAgent',
-'CSE[\x20]HTML[\x20]Validator',
-'C\-T[\x20]bot',
-'CUBOT',
-'Curl/PHP',
-'cyencebot',
-'DataCrawler/',
-'daumoa',
-'daum',
-'Deepnet[\x20]Explorer',
-'DeuSu/',
-'Digincore',
-'Discordbot/',
-'Dispatch/',
-'DnyzBot',
-'DoCoMo/',
-'Domain[\x20]Re\-Animator[\x20]Bot',
-'DomainCrawler/',
-'DomainMacroCrawler/',
-'DomainSONOCrawler/',
-'DomainStatsBot/',
-'DotBot/',
-'DuckDuckBot-Https',
-'DuckDuckBot',
-'DuckDuckGo\-Favicons\-Bot/',
-'ELinks/',
-'ELinks[\x20]\(',
-'EmailMarketingRobot/',
-'EmeraldShield\.com[\x20]WebBot',
-'envolk\[ITS\]spider/',
-'eright',
-'EsperanzaBot',
-'Exabot/',
-'ExtLinksBot',
-'ExperianCrawlUK',
-'facebookexternalhit/',
-'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de',
-'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de',
-'FAST\-WebCrawler/',
-'Feosey[\x20]Mohk[\x20]Crawler',
-'findlinks/',
-'Findxbot/',
-'FirePHP/',
-'firstdirectory\-bot',
-'flamingo',
-'FlippyBearBot/',
-'^foo$',
-'Foregenix[\x20]Web[\x20]Scan',
-'FreeWebMonitoring[\x20]SiteChecker/',
-'fujilabol',
-'FurlBot/',
-'Gaisbot/',
-'Gallent[\x20]Spider',
-'GarlikCrawler/',
-'Getintent[\x20]Crawler',
-'GetintentCrawler[\x20]getintent\.com',
-'Gigabot/',
-'gipo\-crawler/Nutch',
-'Girafabot',
-'Gluten[\x20]Free[\x20]Crawler/',
-'gocrawl',
-'Gowikibot',
-'Go\-http\-client/',
-'GrapeshotCrawler/',
-'GSiteCrawler/',
-'GurujiBot/',
-'hadiBot',
-'HaosouSpider',
-'HELLO[\x20]Crawler',
-'holmes/',
-'houzzbot',
-'HTTP_Request2/',
-'HubSpot[\x20]Webcrawler',
-'HyperCrawl/',
-'ICC\-Crawler/',
-'iconoclast',
-'IDGCrawler/Nutch',
-'IDG/UK',
-'idmarch[\x20]Automatic\.beta/',
-'InbyBot',
-'Incutio[\x20]XML',
-'IndeedBot',
-'InfluenceBot',
-'IonCrawl',
-'IRLbot/',
-'IssueCrawler',
-'istellabot/',
-'James[\x20]BOT',
-'Jigsaw/',
-'JobFeed',
-'Jooblebot',
-'KomodiaBot/',
-'Konqueror/',
-'laserlikebot',
-'Lightspeed',
-'linkapediabot',
-'metager\-linkchecker',
-'Linguee[\x20]Bot',
-'linkchecker',
-'LinkCheck',
-'linkdexbot/',
-'LinkedInBot/',
-'LinkpadBot/',
-'Links[\x20]\(',
-'LinksManager\.com_bot',
-'LWP::Simple/',
-'Mail\.RU_Bot/',
-'makecontact',
-'mappy',
-'MauiBot',
-'meanpathbot/',
-'Mechanize',
-'Mediatoolkitbot',
-'MegaIndex\.ru/',
-'merzscope',
-'Meta_Bot',
-'mfibot/',
-'microsoft.*discovery',
-'missigua_locator',
-'MixrankBot',
-'MJ12bot/',
-'MojeekBot',
-'Mojolicious',
-'MXT/Nutch',
-'My[\x20]Nutch[\x20]Spider/',
-'myse/Nutch',
-'Naaraa',
-'Neevabot',
-'NerdyBot',
-'netEstate[\x20]NE[\x20]Crawler',
-'NetResearchServer/',
-'Nimbostratus-Bot',
-'nominet',
-'NRLCorpusBuilder/Nutch',
-'nutch\-1\.4/',
-'nutch\-1\.8/',
-'NutchCVS/',
-'o\.uk[\x20]robot',
-'ocrawler;',
-'ODP[\x20]link[\x20]checker',
-'Offline[\x20]Explorer/',
-'OmniExplorer_Bot/',
-'OrangeBot/',
-'Orliac',
-'OutclicksBot',
-'PageBitesHyperBot/',
-'Pcore',
-'pdffillerbot/',
-'peopleman',
-'PetalBot',
-'PhantomJS',
-'PHP/5\.2\.8',
-'Pinterestbot',
-'PiplBot',
-'Ploetz[\x20]\+[\x20]Zeller',
-'Plukkie/',
-'Princetonbot/',
-'PrivacyAwareBot/',
-'Prlog/',
-'proximic',
-'psbot/',
-'psbot\-image',
-'python_wk_crawler',
-'Python\-urllib/',
-'QCrawl',
-'Quick-Crawler',
-'ResearchBot',
-'roboto',
-'rogerbot/',
-'RSSingBot',
-'RukiCrawler/',
-'SafeDNS[\x20]search[\x20]bot/',
-'SafeDNSBot',
-'SafeSearch[\x20]microdata[\x20]crawler',
-'safesearch',
-'SBL\-BOT',
-'scrapy',
-'Screaming[\x20]Frog[\x20]SEO[\x20]Spider/',
-'ScreenerBot[\x20]Crawler[\x20]Beta',
-'Scrubby',
-'Searchie/',
-'SecurityResearch\.bot',
-'Seekmo',
-'semanticbot',
-'SemrushBot/',
-'SemrushBot-SI',
-'seo\-audit\-check\-bot/',
-'Seobility',
-'SEOkicks\-Robot',
-'SEOlyticsCrawler/',
-'SEOstats',
-'Seosys/Nutch',
-'Seoterritory\.com[\x20]bot',
-'serendeputy',
-'Shim\-Crawler',
-'SiteExplorer/',
-'siteexplorer\.info',
-'siteimprove',
-'Slackbot\-LinkExpanding',
-'SmabblerBot/',
-'Sogou[\x20]web[\x20]spider/',
-'special_archiver/',
-'Spiderbot/',
-'SpuhexBot',
-'spyonweb',
-'ssearch_bot',
-'Streamline3Bot',
-'SurdotlyBot/',
-'SurveyBot/',
-'taiil/Nutch',
-'tbot\-nutch',
-'TeeRaidBot',
-'TelegramBot',
-'Test/Nutch',
-'Test[\x20]Spider',
-'TestCrawler',
-'The[\x20]Knowledge[\x20]AI',
-'TkBot',
-'tracemyfile',
-'trendiction',
-'TurnitinBot/',
-'TurnitinBot',
-'TweetmemeBot/',
-'UCY/Nutch',
-'uni-leipzig\.de',
-'Uptimebot/',
-'UptimeRobot/',
-'URL[\x20]Checker',
-'UXCrawlerBot',
-'Validator\.nu/',
-'vBSEO',
-'vBulletin[\x20]via[\x20]PHP',
-'vebidoobot',
-'vegi[\x20]bot',
-'Velen',
-'viz/Nutch',
-'VoilaBot',
-'VORTEX/',
-'voyager/',
-'vuhuvBot',
-'W3C_Validator/',
-'W3C\-checklink/',
-'WBSearchBot/',
-'WbSrch/',
-'WeSEE:Ads/PageBot',
-'WeSEE:Ads/PictureBot',
-'WeSEE_Bot',
-'Wget/',
-'Who\.is[\x20]Bot',
-'wonderbot/',
-'woobot/',
-'Wotbox/',
-'Xaldon[\x20]WebSpider',
-'Xenu[\x20]Link[\x20]Sleuth',
-'xenu_link_sleuth',
-'XML[\x20]Sitemaps[\x20]Generator',
-'XoviBot/',
-'yacybot',
-'Yahoo[\x20]Link[\x20]Preview',
-'yak',
-'YisouSpider',
-'yoozBot',
-'Your\-Website\-Sucks',
-'zoominfobot',
-'zspider/',
-'ZumBot/',
-# below placed at end to catch some generics
-'ng/1\.',
-'ng/2\.',
-'libwww\-perl',
-'urllib',
-'javabee',
-'projectwf\-java\-test\-crawler',
-'java',
-'loocalcrawler/nutch',
-'nutchosu\-vlib',
-'nutch',
-'perlcrawler',
-'perl',
-# old robots using firefox < version 11 not identifying themselves as a robot.
-'(firefox/)([0-9]\.|[0-1][0]\.)'
-);
-
-@RobotsSearchIDOrder_list2 = (
-# Less common robots (In robot file)
-'^Mozilla$',
-'^mozilla\/3\.0\s\(compatible$',
-'^mozilla\/4\.0$',
-'^mozilla\/4\.0\s\(compatible;\)$',
-'^mozilla\/5\.0$',
-'^mozilla\/5\.0\s\(compatible;$',
-'^mozilla\/5\.0\s\(en\-us\)$',
-'^mozilla\/5\.0\sfirefox\/3\.0\.5$',
-'^Mozilla/6\.0[\x20]\(compatible\)$',
-'^Mozilla/(.*)Beta[\x20]\(Windows\)',
-'MSIE[\x20]2',
-'MSIE[\x20]3',
-'MSIE[\x20]4',
-'MSIE[\x20]5',
-'MSIE[\x20]6',
-'MSIE\+6\.0\;',
-'Windows[\x20]95',
-'Windows[\x20]98',
-
-# these could be removed to speed up processing as they are rarely seen
-'a6\-indexer',
-'abcdatos',
-'abonti\.com',
-'acme\.spider',
-'activebookmark',
-'adamm_bot',
-'advbot',
-'affectv\.co\.uk',
-'ahoythehomepagefinder',
-'aleadsoftbot',
-'alkaline',
-'allrati',
-'alltop',
-'almaden',
-'alpha_search_agent',
-'anthill',
-'antibot',
-'aport',
-'appie',
-'applesyndication',
-'arachnophilia',
-'arale',
-'araneo',
-'architext',
-'archive\-de\.com',
-'aretha',
-'argus',
-'ariadne',
-'arianna\.libero\.it',
-'arks',
-'aspider',
-'aspseek',
-'asterias',
-'asynchttpclient',
-'atn\.txt',
-'atomz',
-'auresys',
-'awbot',
-'backlinktest\.com',
-'backrub',
-'bbot',
-'becomebot',
-'bender',
-'betabot',
-'bigbrother',
-'biglotron',
-'BingLocalSearch',
-'bittorrent_bot',
-'biz360[_+\s]spider',
-'bjaaland',
-'blackwidow',
-'blindekuh',
-'blogbridge[_+\s]service',
-'blogged_crawl',
-'bloglines',
-'bloglovin',
-'blogpulse',
-'blogsearch',
-'blogshares',
-'blogslive',
-'blogssay',
-'bloodhound',
-'bncf\.firenze\.sbn\.it/raccolta\.txt',
-'bobby',
-'bookmark\-manager',
-'borg\-bot',
-'boris',
-'brightnet',
-'bruinbot',
-'bspider',
-'bubing',
-'bumblebee',
-'butterfly',
-'buzztracker',
-'cactvschemistryspider',
-'calif[^r]',
-'candlelight[_+\s]favorites[_+\s]inspector',
-'careerbot',
-'carpathia',
-'cassandra',
-'catbot',
-'cbn00glebot',
-'cerberian_drtrs',
-'cfetch',
-'cgireader',
-'chattertrap',
-'check_http',
-'checkbot',
-'checkweb_link_validator',
-'christcrawler',
-'churl',
-'cienciaficcion',
-'cipinetbot',
-'imagecoccoc',
-'coccoc',
-'coldfusion',
-'collective',
-'combine',
-'commons\-httpclient',
-'computer_and_automation_research_institute_crawler',
-'conceptbot',
-'contentmatch',
-'converamultimediacrawler',
-'coolbot',
-'copubbot',
-'core',
-'covario',
-'cruiser',
-'cscrawler',
-'cuasarbot',
-'cursor',
-'cusco',
-'custo',
-'cyberspyder',
-'datafountains/dmoz_downloader',
-'dataprovider\.com',
-'daviesbot',
-'daylifefeedfetcher',
-'daypopbot',
-'deepindex',
-'desertrealm',
-'deweb',
-'dienstspider',
-'digger',
-'digout4u',
-'diibot',
-'dipsie\.bot',
-'direct_hit',
-'discobot',
-'dlvr\.it',
-'dnabot',
-'dnsgroup',
-'doccheckbot',
-'domainappender',
-'domainchecker',
-'domainsdb\.net',
-'download_express',
-'dragonbot',
-'dreamwidth',
-'drupal',
-'dulance',
-'dumbot',
-'dumm\.de\-bot',
-'dwcp',
-'e\-collector',
-'earthcom\.info',
-'easydl',
-'ebiness',
-'eccp',
-'echo!',
-'edgeio\-retriever',
-'elfinbot',
-'emacs',
-'emcspider',
-'enteprise',
-'ernst[:blank:]2\.0',
-'esther',
-'ets_v',
-'eventax',
-'everbeecrawler',
-'everest\-vulcan',
-'evliyacelebi',
-'exactseek',
-'extreme[_+\s]picture[_+\s]finder',
-'ezoom',
-'ezresult',
-'facebook',
-'facebot',
-'fast\-search\-engine',
-'matrix_s\.p\.a\._\-_fast_enterprise_crawler',
-'fast_enterprise_crawler',
-'fastbot',
-'fastcrawler',
-'favicon',
-'favorg',
-'favorites_sweeper',
-'fdse',
-'feedburner',
-'feedcrawl',
-'feedflow',
-'feedmyinbox',
-'feedroll\.com',
-'feedsky',
-'feedster',
-'feedvalidator',
-'feedzira',
-'felix',
-'ferret',
-'fetchbot',
-'fetchrover',
-'fever/',
-'fido',
-'filmkamerabot',
-'filterdb\.iss\.net',
-'finderlein[_+\s]research[_+\s]crawler',
-'findexa_crawler',
-'finnish',
-'fireball',
-'firmilybot',
-'flexum',
-'foaf\-search\.net',
-'fooky\.com/ScorpionBot',
-'fouineur',
-'francoroute',
-'freecrawl',
-'freenews',
-'funnelweb',
-'g2crawler',
-'gama',
-'gazz',
-'gcreep',
-'geniebot',
-'genieo',
-'geohasher',
-'getbot',
-'geturl',
-'gigablastopensource',
-'global_fetch',
-'gnodspider',
-'goforit\.com',
-'goforitbot',
-'golem',
-'gonzo',
-'gougou',
-'gpu_p2p_crawler',
-'grabber',
-'grapeshot',
-'grapnel',
-'griffon',
-'gromit',
-'grub',
-'gulliver',
-'gulperbot',
-'hambot',
-'hanrss',
-'harvest',
-'havindex',
-'henrythemiragorobot',
-'heritrix',
-'hl_ftien_spider',
-'hometown',
-'hoowwwer',
-'hpprint',
-'htdig',
-'html[_+\s]link[_+\s]validator',
-'htmlgobble',
-'htmlparser',
-'httrack',
-'hundesuche\.com\-bot',
-'hyperdecontextualizer',
-'ia_archiver\-web\.archive\.org',
-'ia_archiver',
-'iajabot',
-'iaskspider',
-'i\-bot',
-'icarus6j',
-'ichiro',
-'icjobs\.de',
-'ilse',
-'iltrovatore\-setaccio',
-'imagelock',
-'implisensebot',
-'inagist',
-'incywincy',
-'infobot',
-'infociousbot',
-'infohelfer',
-'infomine',
-'informant',
-'infoseeksidewinder',
-'infoseek',
-'infospider',
-'inspectorwww',
-'insurancobot',
-'integromedb\.org',
-'intelliagent',
-'internet[_+\s]ninja',
-'internetarchive',
-'internetseer',
-'internetsupervision',
-'ips\-agent',
-'irobot',
-'iron33',
-'isearch2006',
-'israelisearch',
-'iupui_research_bot',
-'izsearch',
-'jacobin[\x20]club',
-'jakarta',
-'jbot',
-'jcrawler',
-'jeeves',
-'jennybot',
-'jobboerse',
-'jobot',
-'jobo',
-'joebot',
-'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility',
-'js\-kit',
-'jubii',
-'jumpstation',
-'justview',
-'kalambot',
-'kamano\.de_newsfeedverzeichnis',
-'kapsi',
-'katipo',
-'kazoombot',
-'kevin',
-'keyoshid',
-'kilroy',
-'kinja\-imagebot',
-'kinjabot',
-'knowitall',
-'knowledge\.com',
-'ko[_+\s]yappo[_+\s]robot',
-'kouaa_krawler',
-'krugle',
-'ksibot',
-'kummhttp',
-'kurzor',
-'labelgrabber\.txt',
-'lanshanbot',
-'larbin',
-'largesmall[\x20]crawler',
-'legs',
-'letscrawl\.com',
-'libcrawl',
-'lilina',
-'link_valet_online',
-'linkbot',
-'linkdex\.com',
-'linkidator',
-'linkscan',
-'linkstats[\x20]bot',
-'linkwalker',
-'lipperhey',
-'livejournal\.com',
-'lmspider',
-'loadtimebot',
-'lockon',
-'logo_gif',
-'longurl',
-'lssrocketcrawler',
-'ltbot',
-'ltx71',
-'lwp\-request',
-'lwp\-trivial',
-'lycos[_+\s]',
-'macworm',
-'madaali\.de',
-'magpierss',
-'magpie',
-'mapoftheinternet\.com',
-'marvin',
-'mattie',
-'mediabot',
-'mediafox',
-'megaindex',
-'megite',
-'memorybot',
-'mercator',
-'meshexplorer',
-'metager2\-verification\-bot',
-'metajobbot',
-'metaspinner',
-'metauri',
-'miadev',
-'microsoft[_+\s]url[_+\s]control',
-'microsoft[\x20]bits',
-'microsoft\-webdav\-miniredir',
-'mindcrawler',
-'mindupbot',
-'mini\-reptile',
-'minirank',
-'misterbot',
-'miva',
-'mizzu_labs',
-'mnogosearch',
-'moget',
-'momspider',
-'monster',
-'motor',
-'movabletype',
-'ms[_+\s]search[_+\s]6\.0[_+\s]robot',
-'ms_search_4\.0_robot',
-'msnbot\-udiscovery',
-'msrabot',
-'msrbot',
-'mt::telegraph::agent',
-'muncher',
-'muscatferret',
-'mwdsearch',
-'mydoyouhike',
-'myweb',
-'nagios',
-'nasa_search',
-'ndspider',
-'nederland\.zoek',
-'netcarta',
-'netcraft',
-'netluchs',
-'netmechanic',
-'netnewswire',
-'netscoop',
-'netsprint',
-'netvibes',
-'newrelicpinger',
-'newscan\-online',
-'newsfox',
-'newsgatoronline',
-'nextgensearchbot',
-'nhse',
-'nicebot',
-'nimblecrawler',
-'ning',
-'nomad',
-'northstar',
-'noxtrumbot',
-'npbot',
-'nzexplorer',
-'objectssearch',
-'occam',
-'ocelli',
-'octopus',
-'octora_beta_bot',
-'onet\.pl[_+\s]sa',
-'onfolio',
-'openfind',
-'opentaggerbot',
-'openwebspider',
-'optimizer',
-'oracle_ultra_search',
-'orb_search',
-'orbiter',
-'packrat',
-'pageboy',
-'panscient',
-'parasite',
-'passwordmaker\.org',
-'patric',
-'pear_http_request_class',
-'peerbot',
-'pegasus',
-'perignator',
-'perman',
-'petersnews',
-'phantom',
-'php[_+\s]version[_+\s]tracker',
-'phpcrawl',
-'phpdig',
-'picmole',
-'pictureofinternet',
-'piltdownman',
-'pimptrain',
-'ping\.blo\.gs',
-'pingdom',
-'pioneer',
-'pita',
-'pitkow',
-'pjspider',
-'plinki',
-'pluckfeedcrawler',
-'plumtreewebaccessor',
-'pogodak',
-'pompos',
-'popdexter',
-'poppi',
-'port_huron_labs',
-'portalb',
-'postfavorites',
-'postpost',
-'postrank',
-'powermarks',
-'printfulbot',
-'proodlebot',
-'protopage',
-'publiclibraryarchive',
-'pyquery',
-'python',
-'qihoobot',
-'quipply',
-'qwantify',
-'r6\_',
-'rambler',
-'ratingburner',
-'raven',
-'rbse',
-'redalert',
-'regator',
-'relevantnoise\.com',
-'resumerobot',
-'rhcs',
-'riddler',
-'road_runner',
-'robbie',
-'robi',
-'robocrawl',
-'robofox',
-'robozilla',
-'rojo',
-'rome[\x20]client',
-'roverbot',
-'rpt\-httpclient',
-'rssgraffiti',
-'rssimagesbot',
-'ruffle',
-'rufusbot',
-'rules',
-'safeads\.xyz',
-'safetynetrobot',
-'sage\+\+',
-'sandcrawler',
-'savetheworldheritage',
-'sbider',
-'schizozilla',
-'scooter',
-'scoutjet',
-'scumbot',
-'search\-info',
-'search_au',
-'searchguild[_+\s]dmoz[_+\s]experiment',
-'searchmetricsbot',
-'searchprocess',
-'seekbot',
-'semalt',
-'senrigan',
-'sensis_web_crawler',
-'seodiver',
-'seokicks\.de',
-'seoscanners',
-'sgscout',
-'shaggy',
-'shaihulud',
-'shareaholicbot',
-'shoutcast',
-'sift',
-'simbot',
-'simplepie',
-'sistrix',
-'site\-valet',
-'sitebot',
-'sitedomain\-bot',
-'sitetech',
-'skimbot',
-'skymob',
-'slcrawler',
-'slurp',
-'slysearch',
-'smartspider',
-'smtbot',
-'snap\.com_beta_crawler',
-'snappy',
-'snooper',
-'sohu\-search',
-'sohu',
-'solbot',
-'speedy',
-'sphere_scout',
-'spider[_+\s]monkey',
-'spiderline',
-'spiderlytics',
-'spiderman',
-'spiderview',
-'spip',
-'sproose_crawler',
-'spry',
-'sqworm',
-'ssearcher',
-'steeler',
-'steroid__download',
-'stq_bot',
-'Stratagems[\x20]Kumo',
-'suchfin\-bot',
-'suke',
-'summify\.com',
-'sunrise',
-'suntek',
-'superbot',
-'superfeedr',
-'susie',
-'sven',
-'syndic8',
-'syndicapi',
-'synoobot',
-'synthesio',
-'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',
-'tach_bw',
-'tagyu_agent',
-'tailrank',
-'tarantula',
-'tarspider',
-'tcl_http_client_package',
-'techbot',
-'technoratibot',
-'templeton',
-'teoma',
-'teragramcrawlersurf',
-'test_crawler',
-'testbot',
-'thumbsniper',
-'titan',
-'titin',
-'tkwww',
-'tlspider',
-'topblogsinfo',
-'topicblogs',
-'topix\.net',
-'trapit',
-'trileet',
-'turtlescanner',
-'turtle',
-'tutorgigbot',
-'tweetedtimes',
-'twiceler',
-'twisted[\x20]pagegetter',
-'twitterbot',
-'twitterfeed',
-'ubicrawler',
-'ucsd',
-'udmsearch',
-'ultraseek',
-'um\-IC',
-'um\-LN',
-'unchaos_bot_hybrid_web_search_engine',
-'unido\-bot',
-'unisterbot',
-'universalfeedparser',
-'unlost_web_crawler',
-'unwindfetchor',
-'updated',
-'urlck',
-'ustc\-semantic\-group',
-'vagabondo\-wap',
-'vagabondo',
-'valkyrie',
-'vermut',
-'versus_crawler_from_eda\.baykan@epfl\.ch',
-'verticrawl',
-'vespa_crawler',
-'victoria',
-'virus[_+\s]detector',
-'visionsearch',
-'voidbot',
-'voltron',
-'vse/',
-'vwbot',
-'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa',
-'w3index',
-'w3m2',
-'wallpaper',
-'wanderer',
-'wapspider',
-'wapspIRLider',
-'watchmouse',
-'wavefire',
-'waybackarchive\.org',
-'wazzup',
-'web_downloader',
-'webbandit',
-'webbase',
-'webcatcher',
-'webclipping\.com',
-'webcollage',
-'webcompass',
-'webcopy',
-'webcrawl\.net',
-'webdup',
-'webfetcher',
-'webfilter',
-'webfoot',
-'webinator',
-'webindexer',
-'weblayers',
-'weblinker',
-'webminer',
-'webmirror',
-'webmoose',
-'webquest',
-'webreader',
-'webreaper',
-'website[_+\s]monitoring[_+\s]bot',
-'websnarf',
-'webspider',
-'webvac',
-'webvulncrawl',
-'webwalker',
-'webwalk',
-'webwatch',
-'wells_search',
-'wer\-liefert\-was',
-'wesee:search',
-'wevikabot',
-'whatuseek',
-'whowhere',
-'windows\-rss\-platform',
-'wired\-digital',
-'zyborg',
-'wisenutbot',
-'wiumi',
-'wmir',
-'wolp',
-'wombat',
-'wonderer',
-'woozweb',
-'wordpress',
-'worm',
-'wume_crawler',
-'wwwc',
-'wwweasel',
-'wz101',
-'xget',
-'xirq',
-'xydo',
-'y!j',
-'yahoo![\x20]searchmonkey',
-'yahoo!_mindset',
-'yahoo\-blogs',
-'yahoo\-mmcrawler',
-'yahoo\-newscrawler',
-'yahoo[\x20]pipes',
-'yahoo\-verticalcrawler',
-'yahoocachesystem',
-'yahooexternalcache',
-'yahoofeedseeker',
-'yahooseeker\-testing',
-'yahooseeker',
-'yahooysmcm',
-'yammer',
-'yanga',
-'yet\-another\-spider',
-'yeti',
-'yie8',
-'yodaobot',
-'yooglifetchagent',
-'youdao',
-'yourls',
-'z\-add_link_checker',
-'zealbot',
-'zemanta',
-'zend_http_client',
-'zeus',
-'zhuaxia',
-'[^a]fish',
-'[\x20]netseer[\x20]',
-'^[1-3]$',
-'^finbot',
-'^motorola$',
-'^msie',
-'^voyager/',
-'^webindex$',
-'1\-more_scanner',
-'nbot'
-);
-
-@RobotsSearchIDOrder_listgen = (
-# Generic robot
-'robot',
-'blog',
-'checker',
-'crawl',
-'discover',
-'feed',
-'fetcher',
-'hunter',
-'link',
-'scanner',
-'seek',
-'sitemap',
-'spider',
-'sucker',
-'survey',
-'validator',
-'bot[\s_+:,\.\;\/\\\-]',
-'[\s_+:,\.\;\/\\\-]bot',
-'curl',
-'php',
-'ruby/',
-# Moving oBot here so it doesn't get assigned for other *obot robots
-'oBot/',
-'no_user_agent'
-);
-
-
-# RobotsHashIDLib
-# List of robots names ('robot id','robot clear text')
-#-------------------------------------------------------
-%RobotsHashIDLib   = (
-# Common robots (In robot file)
-'bingbot/','bingbot',
-'bingpreview','BingPreview',
-'MSIECrawler','MSIECrawler',
-'msnbot/','msnbot',
-'msnbot\-media/','msnbot-media',
-'AdIdxBot/','AdIdxBot Microsoft Ad Quality control',
-'NOT[\x20]Googlebot/','NOT Googlebot',
-'Googlebot/','Googlebot',
-'Google[\x20]Web[\x20]Preview','Google Web Preview',
-'Googlebot\-Image/','Googlebot-Image',
-'Googlebot\-Mobile/','Googlebot-Mobile',
-'Google[\x20]Page[\x20]Speed','Google Page Speed',
-'google\-sitemaps','google-sitemaps',
-'Googlebot\-News','Googlebot-News',
-'Googlebot\-Video/','Googlebot-Video',
-'AdsBot\-Google[\x20]\(','AdsBot-Google',
-'AdsBot\-Google\-Mobile\-Apps','AdsBot-Google-Mobile-Apps',
-'Adsbot','Adsbot',
-'Mediapartners-Google','Mediapartners-Google',
-'Feedfetcher\-Google','Feedfetcher-Google',
-'Google\-Adwords\-Instant','Google-Adwords-Instant',
-'Firefox/1\.5','Nautic Expo using Firefox/1.5',
-'Yahoo![\x20]Slurp[\x20]China','Yahoo! Slurp China',
-'Yahoo![\x20]Slurp','Yahoo! Slurp',
-'Baiduspider/','Baiduspider',
-'Baiduspider\-image','Baiduspider-image',
-'Baiduspider-','Baiduspider ( catchall )',
-'YandexBot/','YandexBot',
-'YandexImages/','YandexImages',
-'YandexImageResizer','YandexImageResizer',
-'YandexMetrika/','YandexMetrika',
-'YandexMobileBot/','YandexMobileBot',
-'yandex','Yandex ( catchall )',
-'electricmonk/','electricmonk',
-'spbot/','spbot',
-'SeznamBot/','SeznamBot',
-'msie8','msie8 - ( Rogue Robot )',
-'AhrefsBot/','AhrefsBot',
-'007ac9[\x20]Crawler','007ac9 Crawler',
-'2345Explorer/','2345Explorer',
-'360Spider','360Spider',
-'A[\x20]Simple[\x20]Crawler','A Simple Crawler',
-'Abrave','Abrave',
-'acapbot/','acapbot',
-'Accoona\-AI\-Agent/','Accoona-AI-Agent',
-'AdnormCrawlerCatchBot/','AdnormCrawlerCatchBot',
-'adscanner','adscanner',
-'aiHitBot/','aiHitBot',
-'aipbot/','aipbot',
-'AlphaBot','AlphaBot',
-'Apache\-HttpClient/','Apache-HttpClient',
-'Apexoo[\x20]Spider','Apexoo Spider',
-'Applebot/','Applebot',
-'arcemedia','AdsBot-ArceMedia',
-'archive\.org_bot','archive.org_bot',
-'Babya[\x20]Discoverer','Babya Discoverer',
-'Barkrowler','Barkrowler',
-'BDCbot/','BDCbot',
-'BellPagesCA/','BellPagesCA',
-'BeNosey[\x20]Mohawk[\x20]Search','BeNosey Mohawk Search',
-'bhcBot','bhcBot',
-'bidswitchbot','bidswitchbot',
-'BigBozz/','BigBozz',
-'BinGet/','BinGet',
-'bitlybot','bit.ly',
-'bl\.uk_lddc_bot/','bl.uk_lddc_bot',
-'BLEXBot/','BLEXBot',
-'bnf.fr_bot','bnf.fr_bot',
-'boitho\.com\-dc/','boitho.com-dc',
-'BoogleBot','BoogleBot',
-'BusinessBot:','BusinessBot:',
-'BW/','BW',
-'Bytespider','Bytespider',
-'CatchBot/','CatchBot',
-'CB/Nutch','CB/Nutch',
-'CCBot/','CCBot',
-'CheckMarkNetwork/','CheckMarkNetwork',
-'Cliqzbot/','Cliqzbot',
-'CMS[\x20]Crawler','CMS Crawler',
-'Companybook\-Crawler','Companybook-Crawler',
-'ConveraCrawler/','ConveraCrawler',
-'Contacts-Crawler','Contacts-Crawler',
-'contxbot','contxbot',
-'cosmos/','cosmos',
-'CRMNLCrawlAgent','CRMNLCrawlAgent',
-'crawl/Nutch','crawl/Nutch',
-'crawler4j','crawler4j',
-'CRAZYWEBCRAWLER','CRAZYWEBCRAWLER',
-'CSE[\x20]HTML[\x20]Validator','CSE HTML Validator',
-'C\-T[\x20]bot','C-T bot',
-'CUBOT','CUBOT',
-'Curl/PHP','Curl/PHP',
-'cyencebot','cyencebot',
-'DataCrawler/','DataCrawler',
-'daumoa','daumoa',
-'daum','daum',
-'Deepnet[\x20]Explorer','Deepnet Explorer',
-'DeuSu/','DeuSu',
-'Digincore','Digincore',
-'Discordbot/','Discordbot',
-'Dispatch/','Dispatch',
-'DnyzBot','DnyzBot',
-'DoCoMo/','DoCoMo',
-'Domain[\x20]Re\-Animator[\x20]Bot','Domain Re-Animator Bot',
-'DomainCrawler/','DomainCrawler',
-'DomainMacroCrawler/','DomainMacroCrawler',
-'DomainSONOCrawler/','DomainSONOCrawler',
-'DomainStatsBot/','DomainStatsBot',
-'DotBot/','DotBot',
-'DuckDuckBot-Https','DuckDuckBot-Https',
-'DuckDuckBot','DuckDuckBot',
-'DuckDuckGo\-Favicons\-Bot/','DuckDuckGo-Favicons-Bot',
-'ELinks/','ELinks',
-'ELinks[\x20]\(','ELinks (',
-'EmailMarketingRobot/','EmailMarketingRobot',
-'EmeraldShield\.com[\x20]WebBot','EmeraldShield.com WebBot',
-'envolk\[ITS\]spider/','envolk ITS spider',
-'eright','eright',
-'EsperanzaBot','EsperanzaBot',
-'Exabot/','Exabot',
-'ExtLinksBot','ExtLinksBot',
-'ExperianCrawlUK','ExperianCrawlUK',
-'facebookexternalhit/','facebookexternalhit',
-'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de','FAST Enterprise crawleradmin.t-info@telekom.de',
-'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de','FAST Enterprise T-Info_BI_cluster crawleradmin.t-info@telekom.de',
-'FAST\-WebCrawler/','FAST-WebCrawler',
-'Feosey[\x20]Mohk[\x20]Crawler','Feosey Mohk Crawler',
-'findlinks/','findlinks',
-'Findxbot/','Findxbot',
-'FirePHP/','FirePHP',
-'firstdirectory\-bot','firstdirectory-bot',
-'flamingo','Flamingo_SearchEngine',
-'FlippyBearBot/','FlippyBearBot',
-'^foo$','foo',
-'Foregenix[\x20]Web[\x20]Scan','Foregenix Web Scan',
-'FreeWebMonitoring[\x20]SiteChecker/','FreeWebMonitoring SiteChecker',
-'fujilabol','fujilabol',
-'FurlBot/','FurlBot',
-'Gaisbot/','Gaisbot',
-'Gallent[\x20]Spider','Gallent Spider',
-'GarlikCrawler/','GarlikCrawler',
-'Getintent[\x20]Crawler','GetIntent Crawler',
-'GetintentCrawler[\x20]getintent\.com','GetintentCrawler getintent.com',
-'Gigabot/','Gigabot',
-'gipo\-crawler/Nutch','gipo-crawler/Nutch',
-'Girafabot','Girafabot',
-'Gluten[\x20]Free[\x20]Crawler/','Gluten Free Crawler',
-'gocrawl','gocrawl',
-'Gowikibot','Gowikibot',
-'Go\-http\-client/','Go-http-client',
-'GrapeshotCrawler/','GrapeshotCrawler',
-'GSiteCrawler/','GSiteCrawler',
-'GurujiBot/','GurujiBot',
-'hadiBot','hadiBot',
-'HaosouSpider','HaosouSpider',
-'HELLO[\x20]Crawler','HELLO Crawler',
-'holmes/','holmes',
-'houzzbot','houzzbot',
-'HTTP_Request2/','HTTP_Request2',
-'HubSpot[\x20]Webcrawler','HubSpot Webcrawler',
-'HyperCrawl/','HyperCrawl',
-'ICC\-Crawler/','ICC-Crawler',
-'iconoclast','iconoclast',
-'IDGCrawler/Nutch','IDGCrawler/Nutch',
-'IDG/UK','IDG/UK',
-'idmarch[\x20]Automatic\.beta/','idmarch Automatic.beta',
-'InbyBot','InbyBot',
-'Incutio[\x20]XML','Incutio XML',
-'IndeedBot','IndeedBot',
-'InfluenceBot','InfluenceBot',
-'IonCrawl','IonCrawl',
-'IRLbot/','IRLbot',
-'IssueCrawler','IssueCrawler',
-'istellabot/','istellabot',
-'James[\x20]BOT','James BOT',
-'Jigsaw/','Jigsaw',
-'JobFeed','JobFeed',
-'Jooblebot','Jooblebot',
-'KomodiaBot/','KomodiaBot',
-'Konqueror/','Konqueror',
-'laserlikebot','laserlikebot',
-'Lightspeed','Lightspeed',
-'linkapediabot','linkapediabot',
-'metager\-linkchecker','metager-linkchecker',
-'Linguee[\x20]Bot','Linguee Bot',
-'linkchecker','linkchecker',
-'LinkCheck','LinkCheck',
-'linkdexbot/','linkdexbot',
-'LinkedInBot/','LinkedInBot',
-'LinkpadBot/','LinkpadBot',
-'Links[\x20]\(','Links (',
-'LinksManager\.com_bot','LinksManager.com_bot',
-'LWP::Simple/','LWP::Simple',
-'Mail\.RU_Bot/','Mail.RU Bot',
-'makecontact','makecontact',
-'mappy','Mappy Crawler',
-'MauiBot','MauiBot',
-'meanpathbot/','meanpathbot',
-'Mechanize','Mechanize',
-'Mediatoolkitbot','Mediatoolkitbot',
-'MegaIndex\.ru/','MegaIndex.ru',
-'merzscope','merzscope',
-'Meta_Bot','Meta_Bot',
-'mfibot/','mfibot',
-'microsoft.*discovery','Microsoft Office Protocol Discovery',
-'missigua_locator','missigua_locator',
-'MixrankBot','MixrankBot',
-'MJ12bot/','MJ12bot',
-'MojeekBot','MojeekBot',
-'Mojolicious','Mojolicious',
-'MXT/Nutch','MXT/Nutch',
-'My[\x20]Nutch[\x20]Spider/','My Nutch Spider',
-'myse/Nutch','myse/Nutch',
-'Naaraa','Naaraa',
-'Neevabot','Neevabot',
-'NerdyBot','NerdyBot',
-'netEstate[\x20]NE[\x20]Crawler','netEstate NE Crawler',
-'NetResearchServer/','NetResearchServer',
-'Nimbostratus-Bot','Nimbostratus-Bot',
-'nominet','nominet',
-'NRLCorpusBuilder/Nutch','NRLCorpusBuilder/Nutch',
-'nutch\-1\.4/','nutch-1.4',
-'nutch\-1\.8/','nutch-1.8',
-'NutchCVS/','NutchCVS',
-'o\.uk[\x20]robot','o uk.robot',
-'ocrawler;','ocrawler;',
-'ODP[\x20]link[\x20]checker','ODP link checker',
-'Offline[\x20]Explorer/','Offline Explorer',
-'OmniExplorer_Bot/','OmniExplorer_Bot',
-'OrangeBot/','OrangeBot',
-'Orliac','Orliac',
-'OutclicksBot','OutclicksBot',
-'PageBitesHyperBot/','PageBitesHyperBot',
-'Pcore','Pcore',
-'pdffillerbot/','pdffillerbot',
-'peopleman','peopleman',
-'PetalBot','PetalBot',
-'PhantomJS','PhantomJS',
-'PHP/5\.2\.8','PHP/5.2.8',
-'Pinterestbot','Pinterestbot',
-'PiplBot','PiplBot',
-'Ploetz[\x20]\+[\x20]Zeller','Ploetz + Zeller',
-'Plukkie/','Plukkie',
-'Princetonbot/','Princetonbot',
-'PrivacyAwareBot/','PrivacyAwareBot',
-'Prlog/','Prlog',
-'proximic','proximic',
-'psbot/','psbot',
-'psbot\-image','psbot-image',
-'python_wk_crawler','python_wk_crawler',
-'Python\-urllib/','Python-urllib',
-'QCrawl','QCrawl',
-'Quick-Crawler','Quick-Crawler',
-'ResearchBot','ResearchBot',
-'roboto','roboto',
-'rogerbot/','rogerbot',
-'RSSingBot','RSSingBot',
-'RukiCrawler/','RukiCrawler',
-'SafeDNS[\x20]search[\x20]bot/','SafeDNS search bot',
-'SafeDNSBot','SafeDNSBot',
-'SafeSearch[\x20]microdata[\x20]crawler','SafeSearch microdata crawler',
-'safesearch','safesearch ( catchall )',
-'SBL\-BOT','SBL-BOT',
-'scrapy','scrapy',
-'Screaming[\x20]Frog[\x20]SEO[\x20]Spider/','Screaming Frog SEO Spider',
-'ScreenerBot[\x20]Crawler[\x20]Beta','ScreenerBot Crawler Beta',
-'Scrubby','Scrubby',
-'Searchie/','Searchie',
-'SecurityResearch\.bot','Security Research Bot',
-'Seekmo','Seekmo',
-'semanticbot','semanticbot',
-'SemrushBot/','SemrushBot',
-'SemrushBot-SI','SemrushBot-SI',
-'seo\-audit\-check\-bot/','seo-audit-check-bot',
-'Seobility','Seobility',
-'SEOkicks\-Robot','SEOkicks-Robot',
-'SEOlyticsCrawler/','SEOlyticsCrawler',
-'SEOstats','SEOstats',
-'Seosys/Nutch','Seosys/Nutch',
-'Seoterritory\.com[\x20]bot','Seoterritory.com.bot',
-'serendeputy','serendeputy',
-'Shim\-Crawler','Shim-Crawler',
-'SiteExplorer/','SiteExplorer',
-'siteexplorer\.info','siteexplorer.info',
-'siteimprove','siteimprove',
-'Slackbot\-LinkExpanding','Slackbot-LinkExpanding',
-'SmabblerBot/','SmabblerBot',
-'Sogou[\x20]web[\x20]spider/','Sogou web spider',
-'special_archiver/','special_archiver',
-'Spiderbot/','Spiderbot',
-'SpuhexBot','SpuhexBot',
-'spyonweb','spyonweb',
-'ssearch_bot','ssearch_bot',
-'Streamline3Bot','Streamline3Bot',
-'SurdotlyBot/','SurdotlyBot',
-'SurveyBot/','SurveyBot',
-'taiil/Nutch','taiil/Nutch',
-'tbot\-nutch','tbot-nutch',
-'TeeRaidBot','TeeRaidBot',
-'TelegramBot','TelegramBot',
-'Test/Nutch','Test/Nutch',
-'Test[\x20]Spider','Test Spider',
-'TestCrawler','TestCrawler',
-'The[\x20]Knowledge[\x20]AI', 'The Knowledge AI',
-'TkBot','TkBot',
-'tracemyfile','tracemyfile',
-'trendiction','trendiction',
-'TurnitinBot/','TurnitinBot',
-'TurnitinBot','TurnitinBot',
-'TweetmemeBot/','TweetmemeBot',
-'UCY/Nutch','UCY/Nutch',
-'uni-leipzig\.de','uni-leipzig.de',
-'Uptimebot/','Uptimebot',
-'UptimeRobot/','UptimeRobot',
-'URL[\x20]Checker','URL Checker',
-'UXCrawlerBot','UXCrawlerBot',
-'Validator\.nu/','Validator.nu',
-'vBSEO','vBSEO',
-'vBulletin[\x20]via[\x20]PHP','vBulletin via PHP',
-'vebidoobot','vebidoobot',
-'vegi[\x20]bot','vegi bot',
-'Velen','Velen',
-'viz/Nutch','viz/Nutch',
-'VoilaBot','VoilaBot',
-'VORTEX/','VORTEX',
-'voyager/','voyager',
-'vuhuvBot','vuhuvBot',
-'W3C_Validator/','W3C_Validator',
-'W3C\-checklink/','W3C-checklink',
-'WBSearchBot/','WBSearchBot',
-'WbSrch/','WbSrch/',
-'WeSEE:Ads/PageBot','WeSEE:Ads/PageBot',
-'WeSEE:Ads/PictureBot','WeSEE:Ads/PictureBot',
-'WeSEE_Bot','WeSEE_Bot',
-'Wget/','Wget',
-'Who\.is[\x20]Bot','Who.is.Bot',
-'wonderbot/','wonderbot',
-'woobot/','woobot',
-'Wotbox/','Wotbox',
-'Xaldon[\x20]WebSpider','Xaldon WebSpider',
-'Xenu[\x20]Link[\x20]Sleuth','Xenu Link Sleuth',
-'xenu_link_sleuth','xenu_link_sleuth',
-'XML[\x20]Sitemaps[\x20]Generator','XML Sitemaps Generator',
-'XoviBot/','XoviBot',
-'yacybot','yacybot',
-'Yahoo[\x20]Link[\x20]Preview','Yahoo Link Preview',
-'yak','yak-linkfluence',
-'YisouSpider','YisouSpider',
-'yoozBot','yoozBot',
-'Your\-Website\-Sucks','Your-Website-Sucks',
-'zoominfobot','zoominfobot',
-'zspider/','zspider',
-'ZumBot/','ZumBot',
-'ng/1\.','ng/1.',
-'ng/2\.','ng/2.',
-'libwww\-perl','libwww-perl',
-'urllib','urllib',
-'javabee','javabee',
-'projectwf\-java\-test\-crawler','projectwf-java-test-crawler',
-'java','Java ( catchall )',
-'loocalcrawler/nutch','loocalcrawler/nutch',
-'nutchosu\-vlib','nutchosu-vlib',
-'nutch','nutch ( catchall )',
-'perlcrawler','perlcrawler',
-'perl','perl',
-'(firefox/)([0-9]\.|[0-1][0]\.)','Firefox version 10 and lower - various robots',
-
-# Less common robots (In robot file)
-'^Mozilla$','Mozilla ( Rogue Robot )',
-'^mozilla\/3\.0\s\(compatible$', 'mozilla/3.0 (compatible - ( Rogue Robot )',
-'^mozilla\/4\.0$', 'mozilla/4.0 - ( Rogue Robot )',
-'^mozilla\/4\.0\s\(compatible;\)$', 'mozilla/4.0 (compatible;) - ( Rogue Robot )',
-'^mozilla\/5\.0$', 'mozilla/5.0 - ( Rogue Robot )',
-'^mozilla\/5\.0\s\(compatible;$', 'mozilla/5.0 (compatible; - ( Rogue Robot )',
-'^mozilla\/5\.0\s\(en\-us\)$', 'mozilla/5.0 (en-us) - ( Rogue Robot )',
-'^mozilla\/5\.0\sfirefox\/3\.0\.5$', 'mozilla/5.0 firefox/3.0.5 - ( Rogue Robot )',
-'^Mozilla/6\.0[\x20]\(compatible\)$','Mozilla/6.0 (compatible) - ( Rogue Robot )',
-'^Mozilla/(.*)Beta[\x20]\(Windows\)','Mozilla Beta (Windows) - ( Rogue Robot )',
-'MSIE[\x20]2','MSIE 2 - ( Rogue Robot )',
-'MSIE[\x20]3','MSIE 3 - ( Rogue Robot )',
-'MSIE[\x20]4','MSIE 4 - ( Rogue Robot )',
-'MSIE[\x20]5','MSIE 5 - ( Rogue Robot )',
-'MSIE[\x20]6','MSIE 6 - ( Rogue Robot )',
-'MSIE\+6\.0\;','MSIE+6.0; - ( Rogue Robot)',
-'Windows[\x20]95','Windows 95 - ( Rogue Robot )',
-'Windows[\x20]98','Windows 99 - ( Rogue Robot )',
-
-# these could be removed to speed up processing as they are rarely seen
-'a6\-indexer','a6-indexer',
-'abcdatos','abcdatos',
-'abonti\.com','abonti.com',
-'acme\.spider','acme.spider',
-'activebookmark','activebookmark',
-'adamm_bot','adamm_bot',
-'advbot','advbot',
-'affectv\.co\.uk','affectv.co.uk',
-'ahoythehomepagefinder','ahoythehomepagefinder',
-'aleadsoftbot','aleadsoftbot',
-'alkaline','alkaline',
-'allrati','allrati',
-'alltop','alltop',
-'almaden','almaden',
-'alpha_search_agent','alpha_search_agent',
-'anthill','anthill',
-'antibot','antibot',
-'aport','aport',
-'appie','appie',
-'applesyndication','applesyndication',
-'arachnophilia','arachnophilia',
-'arale','arale',
-'araneo','araneo',
-'architext','architext',
-'archive\-de\.com','archive-de.com',
-'aretha','aretha',
-'argus','argus',
-'ariadne','ariadne',
-'arianna\.libero\.it','arianna.libero.it',
-'arks','arks',
-'aspider','aspider',
-'aspseek','aspseek',
-'asterias','asterias',
-'asynchttpclient','asynchttpclient',
-'atn\.txt','atn.txt',
-'atomz','atomz',
-'auresys','auresys',
-'awbot','awbot',
-'backlinktest\.com','backlinktest.com',
-'backrub','backrub',
-'bbot','bbot',
-'becomebot','becomebot',
-'bender','bender',
-'betabot','betabot',
-'bigbrother','bigbrother',
-'biglotron','biglotron',
-'BingLocalSearch','BingLocalSearch',
-'bittorrent_bot','bittorrent_bot',
-'biz360[_+\s]spider','biz360 spider',
-'bjaaland','bjaaland',
-'blackwidow','blackwidow',
-'blindekuh','blindekuh',
-'blogbridge[_+\s]service','blogbridge service',
-'blogged_crawl','blogged_crawl',
-'bloglines','bloglines',
-'bloglovin','bloglovin',
-'blogpulse','blogpulse',
-'blogsearch','blogsearch',
-'blogshares','blogshares',
-'blogslive','blogslive',
-'blogssay','blogssay',
-'bloodhound','bloodhound',
-'bncf\.firenze\.sbn\.it/raccolta\.txt','bncf\.firenze\.sbn.it/raccolta.txt',
-'bobby','bobby',
-'bookmark\-manager','bookmark-manager',
-'borg\-bot','borg-bot',
-'boris','boris',
-'brightnet','brightnet',
-'bruinbot','bruinbot',
-'bspider','bspider',
-'bubing','bubing',
-'bumblebee','bumblebee',
-'butterfly','butterfly',
-'buzztracker','buzztracker',
-'cactvschemistryspider','cactvschemistryspider',
-'calif[^r]','calif[^r]',
-'candlelight[_+\s]favorites[_+\s]inspector','candlelight favorites inspector',
-'careerbot','careerbot',
-'carpathia','carpathia',
-'cassandra','cassandra',
-'catbot','catbot',
-'cbn00glebot','cbn00glebot',
-'cerberian_drtrs','cerberian_drtrs',
-'cfetch','cfetch',
-'cgireader','cgireader',
-'chattertrap','chattertrap',
-'check_http','check_http',
-'checkbot','checkbot',
-'checkweb_link_validator','checkweb_link_validator',
-'christcrawler','christcrawler',
-'churl','churl',
-'cienciaficcion','cienciaficcion',
-'cipinetbot','cipinetbot',
-'imagecoccoc','imagecoccoc',
-'coccoc','coccoc',
-'coldfusion','coldfusion',
-'collective','collective',
-'combine','combine',
-'commons\-httpclient','commons-httpclient',
-'computer_and_automation_research_institute_crawler','computer_and_automation_research_institute_crawler',
-'conceptbot','conceptbot',
-'contentmatch','contentmatch',
-'converamultimediacrawler','converamultimediacrawler',
-'coolbot','coolbot',
-'copubbot','copubbot',
-'core','core',
-'covario','covario',
-'cruiser','cruiser',
-'cscrawler','cscrawler',
-'cuasarbot','cuasarbot',
-'cursor','cursor',
-'cusco','cusco',
-'custo','custo',
-'cyberspyder','cyberspyder',
-'datafountains/dmoz_downloader','datafountains/dmoz_downloader',
-'dataprovider\.com','dataprovider.com',
-'daviesbot','daviesbot',
-'daylifefeedfetcher','daylifefeedfetcher',
-'daypopbot','daypopbot',
-'deepindex','deepindex',
-'desertrealm','desertrealm',
-'deweb','deweb',
-'dienstspider','dienstspider',
-'digger','digger',
-'digout4u','digout4u',
-'diibot','diibot',
-'dipsie\.bot','dipsie.bot',
-'direct_hit','direct_hit',
-'discobot','discobot',
-'dlvr\.it','dlvr.it',
-'dnabot','dnabot',
-'dnsgroup','dnsgroup',
-'doccheckbot','doccheckbot',
-'domainappender','domainappender',
-'domainchecker','domainchecker',
-'domainsdb\.net','domainsdb.net',
-'download_express','download_express',
-'dragonbot','dragonbot',
-'dreamwidth','dreamwidth',
-'drupal','drupal',
-'dulance','dulance',
-'dumbot','dumbot',
-'dumm\.de\-bot','dumm.de-bot',
-'dwcp','dwcp',
-'e\-collector','e-collector',
-'earthcom\.info','earthcom.info',
-'easydl','easydl',
-'ebiness','ebiness',
-'eccp','eccp',
-'echo!','echo!',
-'edgeio\-retriever','edgeio-retriever',
-'elfinbot','elfinbot',
-'emacs','emacs',
-'emcspider','emcspider',
-'enteprise','enteprise',
-'ernst[:blank:]2\.0','ernst[:blank:]2.0',
-'esther','esther',
-'ets_v','ets_v',
-'eventax','eventax',
-'everbeecrawler','everbeecrawler',
-'everest\-vulcan','everest-vulcan',
-'evliyacelebi','evliyacelebi',
-'exactseek','exactseek',
-'extreme[_+\s]picture[_+\s]finder','extreme picture finder',
-'ezoom','ezoom',
-'ezresult','ezresult',
-'facebook','facebook',
-'facebot','facebot',
-'fast\-search\-engine','fast-search-engine',
-'matrix_s\.p\.a\._\-_fast_enterprise_crawler','matrix_s.p.a._-_fast_enterprise_crawler',
-'fast_enterprise_crawler','fast_enterprise_crawler',
-'fastbot','fastbot',
-'fastcrawler','fastcrawler',
-'favicon','favicon',
-'favorg','favorg',
-'favorites_sweeper','favorites_sweeper',
-'fdse','fdse',
-'feedburner','feedburner',
-'feedcrawl','feedcrawl',
-'feedflow','feedflow',
-'feedmyinbox','feedmyinbox',
-'feedroll\.com','feedroll.com',
-'feedsky','feedsky',
-'feedster','feedster',
-'feedvalidator','feedvalidator',
-'feedzira','feedzira',
-'felix','felix',
-'ferret','ferret',
-'fetchbot','fetchbot',
-'fetchrover','fetchrover',
-'fever/','fever',
-'fido','fido',
-'filmkamerabot','filmkamerabot',
-'filterdb\.iss\.net','filterdb.iss.net',
-'finderlein[_+\s]research[_+\s]crawler','finderlein research crawler',
-'findexa_crawler','findexa_crawler',
-'finnish','finnish',
-'fireball','fireball',
-'firmilybot','firmilybot',
-'flexum','flexum',
-'foaf\-search\.net','foaf-search.net',
-'fooky\.com/ScorpionBot','fooky.com/ScorpionBot',
-'fouineur','fouineur',
-'francoroute','francoroute',
-'freecrawl','freecrawl',
-'freenews','freenews',
-'funnelweb','funnelweb',
-'g2crawler','g2crawler',
-'gama','gama',
-'gazz','gazz',
-'gcreep','gcreep',
-'geniebot','geniebot',
-'genieo','genieo',
-'geohasher','geohasher',
-'getbot','getbot',
-'geturl','geturl',
-'gigablastopensource','gigablastopensource',
-'global_fetch','global_fetch',
-'gnodspider','gnodspider',
-'goforit\.com','goforit.com',
-'goforitbot','goforitbot',
-'golem','golem',
-'gonzo','gonzo',
-'gougou','gougou',
-'gpu_p2p_crawler','gpu_p2p_crawler',
-'grabber','grabber',
-'grapeshot','grapeshot',
-'grapnel','grapnel',
-'griffon','griffon',
-'gromit','gromit',
-'grub','grub',
-'gulliver','gulliver',
-'gulperbot','gulperbot',
-'hambot','hambot',
-'hanrss','hanrss',
-'harvest','harvest',
-'havindex','havindex',
-'henrythemiragorobot','henrythemiragorobot',
-'heritrix','heritrix',
-'hl_ftien_spider','hl_ftien_spider',
-'hometown','hometown',
-'hoowwwer','hoowwwer',
-'hpprint','hpprint',
-'htdig','htdig',
-'html[_+\s]link[_+\s]validator','html link validator',
-'htmlgobble','htmlgobble',
-'htmlparser','htmlparser',
-'httrack','httrack',
-'hundesuche\.com\-bot','hundesuche.com-bot',
-'hyperdecontextualizer','hyperdecontextualizer',
-'ia_archiver\-web\.archive\.org','ia_archiver-web.archive.org',
-'ia_archiver','ia_archiver',
-'iajabot','iajabot',
-'iaskspider','iaskspider',
-'i\-bot','i-bot',
-'icarus6j','icarus6j',
-'ichiro','ichiro',
-'icjobs\.de','icjobs.de',
-'ilse','ilse',
-'iltrovatore\-setaccio','iltrovatore-setaccio',
-'imagelock','imagelock',
-'implisensebot','implisensebot',
-'inagist','inagist',
-'incywincy','incywincy',
-'infobot','infobot',
-'infociousbot','infociousbot',
-'infohelfer','infohelfer',
-'infomine','infomine',
-'informant','informant',
-'infoseeksidewinder','infoseeksidewinder',
-'infoseek','infoseek',
-'infospider','infospider',
-'inspectorwww','inspectorwww',
-'insurancobot','insurancobot',
-'integromedb\.org','integromedb.org',
-'intelliagent','intelliagent',
-'internet[_+\s]ninja','internet ninja',
-'internetarchive','internetarchive',
-'internetseer','internetseer',
-'internetsupervision','internetsupervision',
-'ips\-agent','ips-agent',
-'irobot','irobot',
-'iron33','iron33',
-'isearch2006','isearch2006',
-'israelisearch','israelisearch',
-'iupui_research_bot','iupui_research_bot',
-'izsearch','izsearch',
-'jacobin[\x20]club','jacobin club',
-'jakarta','jakarta',
-'jbot','jbot',
-'jcrawler','jcrawler',
-'jeeves','jeeves',
-'jennybot','jennybot',
-'jobboerse','jobboerse',
-'jobot','jobot',
-'jobo','jobo',
-'joebot','joebot',
-'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility','jrtwine software check favorites utility',
-'js\-kit','js-kit',
-'jubii','jubii',
-'jumpstation','jumpstation',
-'justview','justview',
-'kalambot','kalambot',
-'kamano\.de_newsfeedverzeichnis','kamano.de_newsfeedverzeichnis',
-'kapsi','kapsi',
-'katipo','katipo',
-'kazoombot','kazoombot',
-'kevin','kevin',
-'keyoshid','keyoshid',
-'kilroy','kilroy',
-'kinja\-imagebot','kinja-imagebot',
-'kinjabot','kinjabot',
-'knowitall','knowitall',
-'knowledge\.com','knowledge.com',
-'ko[_+\s]yappo[_+\s]robot','ko yappo robot',
-'kouaa_krawler','kouaa_krawler',
-'krugle','krugle',
-'ksibot','ksibot',
-'kummhttp','kummhttp',
-'kurzor','kurzor',
-'labelgrabber\.txt','labelgrabber.txt',
-'lanshanbot','lanshanbot',
-'larbin','larbin',
-'largesmall[\x20]crawler','largesmall crawler',
-'legs','legs',
-'letscrawl\.com','letscrawl.com',
-'libcrawl','libcrawl',
-'lilina','lilina',
-'link_valet_online','link_valet_online',
-'linkbot','linkbot',
-'linkdex\.com','linkdex.com',
-'linkidator','linkidator',
-'linkscan','linkscan',
-'linkstats[\x20]bot','linkstats bot',
-'linkwalker','linkwalker',
-'lipperhey','lipperhey',
-'livejournal\.com','livejournal.com',
-'lmspider','lmspider',
-'loadtimebot','loadtimebot',
-'lockon','lockon',
-'logo_gif','logo_gif',
-'longurl','longurl',
-'lssrocketcrawler','lssrocketcrawler',
-'ltbot','ltbot',
-'ltx71','ltx71',
-'lwp\-request','lwp-request',
-'lwp\-trivial','lwp-trivial',
-'lycos[_+\s]','lycos ',
-'macworm','macworm',
-'madaali\.de','madaali.de',
-'magpierss','magpierss',
-'magpie','magpie',
-'mapoftheinternet\.com','mapoftheinternet.com',
-'marvin','marvin',
-'mattie','mattie',
-'mediabot','mediabot',
-'mediafox','mediafox',
-'megaindex','megaindex',
-'megite','megite',
-'memorybot','memorybot',
-'mercator','mercator',
-'meshexplorer','meshexplorer',
-'metager2\-verification\-bot','metager2-verification-bot',
-'metajobbot','metajobbot',
-'metaspinner','metaspinner',
-'metauri','metauri',
-'miadev','miadev',
-'microsoft[_+\s]url[_+\s]control','microsoft url control',
-'microsoft[\x20]bits','microsoft bits',
-'microsoft\-webdav\-miniredir','microsoft-webdav-miniredir',
-'mindcrawler','mindcrawler',
-'mindupbot','mindupbot',
-'mini\-reptile','mini-reptile',
-'minirank','minirank',
-'misterbot','misterbot',
-'miva','miva',
-'mizzu_labs','mizzu_labs',
-'mnogosearch','mnogosearch',
-'moget','moget',
-'momspider','momspider',
-'monster','monster',
-'motor','motor',
-'movabletype','movabletype',
-'ms[_+\s]search[_+\s]6\.0[_+\s]robot','ms search 6.0 robot',
-'ms_search_4\.0_robot','ms_search_4.0_robot',
-'msnbot\-udiscovery','msnbot-udiscovery',
-'msrabot','msrabot',
-'msrbot','msrbot',
-'mt::telegraph::agent','mt::telegraph::agent',
-'muncher','muncher',
-'muscatferret','muscatferret',
-'mwdsearch','mwdsearch',
-'mydoyouhike','mydoyouhike',
-'myweb','myweb',
-'nagios','nagios',
-'nasa_search','nasa_search',
-'ndspider','ndspider',
-'nederland\.zoek','nederland.zoek',
-'netcarta','netcarta',
-'netcraft','netcraft',
-'netluchs','netluchs',
-'netmechanic','netmechanic',
-'netnewswire','netnewswire',
-'netscoop','netscoop',
-'netsprint','netsprint',
-'netvibes','netvibes',
-'newrelicpinger','newrelicpinger',
-'newscan\-online','newscan-online',
-'newsfox','newsfox',
-'newsgatoronline','newsgatoronline',
-'nextgensearchbot','nextgensearchbot',
-'nhse','nhse',
-'nicebot','nicebot',
-'nimblecrawler','nimblecrawler',
-'ning','ning',
-'nomad','nomad',
-'northstar','northstar',
-'noxtrumbot','noxtrumbot',
-'npbot','npbot',
-'nzexplorer','nzexplorer',
-'objectssearch','objectssearch',
-'occam','occam',
-'ocelli','ocelli',
-'octopus','octopus',
-'octora_beta_bot','octora_beta_bot',
-'onet\.pl[_+\s]sa','onet.pl sa',
-'onfolio','onfolio',
-'openfind','openfind',
-'opentaggerbot','opentaggerbot',
-'openwebspider','openwebspider',
-'optimizer','optimizer',
-'oracle_ultra_search','oracle_ultra_search',
-'orb_search','orb_search',
-'orbiter','orbiter',
-'packrat','packrat',
-'pageboy','pageboy',
-'panscient','panscient',
-'parasite','parasite',
-'passwordmaker\.org','passwordmaker.org',
-'patric','patric',
-'pear_http_request_class','pear_http_request_class',
-'peerbot','peerbot',
-'pegasus','pegasus',
-'perignator','perignator',
-'perman','perman',
-'petersnews','petersnews',
-'phantom','phantom',
-'php[_+\s]version[_+\s]tracker','php version tracker',
-'phpcrawl','phpcrawl',
-'phpdig','phpdig',
-'picmole','picmole',
-'pictureofinternet','pictureofinternet',
-'piltdownman','piltdownman',
-'pimptrain','pimptrain',
-'ping\.blo\.gs','ping.blo.gs',
-'pingdom','pingdom',
-'pioneer','pioneer',
-'pita','pita',
-'pitkow','pitkow',
-'pjspider','pjspider',
-'plinki','plinki',
-'pluckfeedcrawler','pluckfeedcrawler',
-'plumtreewebaccessor','plumtreewebaccessor',
-'pogodak','pogodak',
-'pompos','pompos',
-'popdexter','popdexter',
-'poppi','poppi',
-'port_huron_labs','port_huron_labs',
-'portalb','portalb',
-'postfavorites','postfavorites',
-'postpost','postpost',
-'postrank','postrank',
-'powermarks','powermarks',
-'printfulbot','printfulbot',
-'proodlebot','proodlebot',
-'protopage','protopage',
-'publiclibraryarchive','publiclibraryarchive',
-'pyquery','pyquery',
-'python','python',
-'qihoobot','qihoobot',
-'quipply','quipply',
-'qwantify','qwantify',
-'r6\_','r6\_',
-'rambler','rambler',
-'ratingburner','ratingburner',
-'raven','raven',
-'rbse','rbse',
-'redalert','redalert',
-'regator','regator',
-'relevantnoise\.com','relevantnoise.com',
-'resumerobot','resumerobot',
-'rhcs','rhcs',
-'riddler','riddler',
-'road_runner','road_runner',
-'robbie','robbie',
-'robi','robi',
-'robocrawl','robocrawl',
-'robofox','robofox',
-'robozilla','robozilla',
-'rojo','rojo',
-'rome[\x20]client','rome client',
-'roverbot','roverbot',
-'rpt\-httpclient','rpt-httpclient',
-'rssgraffiti','rssgraffiti',
-'rssimagesbot','rssimagesbot',
-'ruffle','ruffle',
-'rufusbot','rufusbot',
-'rules','rules',
-'safeads\.xyz','safeads.xyz',
-'safetynetrobot','safetynetrobot',
-'sage\+\+','sage++',
-'sandcrawler','sandcrawler',
-'savetheworldheritage','savetheworldheritage',
-'sbider','sbider',
-'schizozilla','schizozilla',
-'scooter','scooter',
-'scoutjet','scoutjet',
-'scumbot','scumbot',
-'search\-info','search-info',
-'search_au','search_au',
-'searchguild[_+\s]dmoz[_+\s]experiment','searchguild dmoz experiment',
-'searchmetricsbot','searchmetricsbot',
-'searchprocess','searchprocess',
-'seekbot','seekbot',
-'semalt','semalt',
-'senrigan','senrigan',
-'sensis_web_crawler','sensis_web_crawler',
-'seodiver','seodiver',
-'seokicks\.de','seokicks.de',
-'seoscanners','seoscanners',
-'sgscout','sgscout',
-'shaggy','shaggy',
-'shaihulud','shaihulud',
-'shareaholicbot','shareaholicbot',
-'shoutcast','shoutcast',
-'sift','sift',
-'simbot','simbot',
-'simplepie','simplepie',
-'sistrix','sistrix',
-'site\-valet','site-valet',
-'sitebot','sitebot',
-'sitedomain\-bot','sitedomain-bot',
-'sitetech','sitetech',
-'skimbot','skimbot',
-'skymob','skymob',
-'slcrawler','slcrawler',
-'slurp','slurp',
-'slysearch','slysearch',
-'smartspider','smartspider',
-'smtbot','smtbot',
-'snap\.com_beta_crawler','snap.com_beta_crawler',
-'snappy','snappy',
-'snooper','snooper',
-'sohu\-search','sohu-search',
-'sohu','sohu ( catchall )',
-'solbot','solbot',
-'speedy','speedy',
-'sphere_scout','sphere_scout',
-'spider[_+\s]monkey','spider monkey',
-'spiderline','spiderline',
-'spiderlytics','spiderlytics',
-'spiderman','spiderman',
-'spiderview','spiderview',
-'spip','spip',
-'sproose_crawler','sproose_crawler',
-'spry','spry',
-'sqworm','sqworm',
-'ssearcher','ssearcher',
-'steeler','steeler',
-'steroid__download','steroid__download',
-'stq_bot','stq_bot',
-'Stratagems[\x20]Kumo','Stratagems Kumo',
-'suchfin\-bot','suchfin-bot',
-'suke','suke',
-'summify\.com','summify.com',
-'sunrise','sunrise',
-'suntek','suntek',
-'superbot','superbot',
-'superfeedr','superfeedr',
-'susie','susie',
-'sven','sven',
-'syndic8','syndic8',
-'syndicapi','syndicapi',
-'synoobot','synoobot',
-'synthesio','synthesio',
-'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','t-h-u-n-d-e-r-s-t-o-n-e',
-'tach_bw','tach_bw',
-'tagyu_agent','tagyu_agent',
-'tailrank','tailrank',
-'tarantula','tarantula',
-'tarspider','tarspider',
-'tcl_http_client_package','tcl_http_client_package',
-'techbot','techbot',
-'technoratibot','technoratibot',
-'templeton','templeton',
-'teoma','teoma',
-'teragramcrawlersurf','teragramcrawlersurf',
-'test_crawler','test_crawler',
-'testbot','testbot',
-'thumbsniper','thumbsniper',
-'titan','titan',
-'titin','titin',
-'tkwww','tkwww',
-'tlspider','tlspider',
-'topblogsinfo','topblogsinfo',
-'topicblogs','topicblogs',
-'topix\.net','topix.net',
-'trapit','trapit',
-'trileet','trileet',
-'turtlescanner','turtlescanner',
-'turtle','turtle',
-'tutorgigbot','tutorgigbot',
-'tweetedtimes','tweetedtimes',
-'twiceler','twiceler',
-'twisted[\x20]pagegetter','twisted pagegetter',
-'twitterbot','twitterbot',
-'twitterfeed','twitterfeed',
-'ubicrawler','ubicrawler',
-'ucsd','ucsd',
-'udmsearch','udmsearch',
-'ultraseek','ultraseek',
-'um\-IC','ubermetrics-technologies.com',
-'um\-LN','ubermetrics-technologies.com',
-'unchaos_bot_hybrid_web_search_engine','unchaos_bot_hybrid_web_search_engine',
-'unido\-bot','unido-bot',
-'unisterbot','unisterbot',
-'universalfeedparser','universalfeedparser',
-'unlost_web_crawler','unlost_web_crawler',
-'unwindfetchor','unwindfetchor',
-'updated','updated',
-'urlck','urlck',
-'ustc\-semantic\-group','ustc-semantic-group',
-'vagabondo\-wap','vagabondo-wap',
-'vagabondo','vagabondo',
-'valkyrie','valkyrie',
-'vermut','vermut',
-'versus_crawler_from_eda\.baykan@epfl\.ch','versus_crawler_from_eda.baykan@epfl.ch',
-'verticrawl','verticrawl',
-'vespa_crawler','vespa_crawler',
-'victoria','victoria',
-'virus[_+\s]detector','virus_detector',
-'visionsearch','visionsearch',
-'voidbot','voidbot',
-'voltron','voltron',
-'vse/','vse',
-'vwbot','vwbot',
-'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa','w3c_css_validator_jfouffa',
-'w3index','w3index',
-'w3m2','w3m2',
-'wallpaper','wallpaper',
-'wanderer','wanderer',
-'wapspider','wapspider',
-'wapspIRLider','wapspIRLider',
-'watchmouse','watchmouse',
-'wavefire','wavefire',
-'waybackarchive\.org','waybackarchive.org',
-'wazzup','wazzup',
-'web_downloader','web_downloader',
-'webbandit','webbandit',
-'webbase','webbase',
-'webcatcher','webcatcher',
-'webclipping\.com','webclipping.com',
-'webcollage','webcollage',
-'webcompass','webcompass',
-'webcopy','webcopy',
-'webcrawl\.net','webcrawl.net',
-'webdup','webdup',
-'webfetcher','webfetcher',
-'webfilter','webfilter',
-'webfoot','webfoot',
-'webinator','webinator',
-'webindexer','webindexer',
-'weblayers','weblayers',
-'weblinker','weblinker',
-'webminer','webminer',
-'webmirror','webmirror',
-'webmoose','webmoose',
-'webquest','webquest',
-'webreader','webreader',
-'webreaper','webreaper',
-'website[_+\s]monitoring[_+\s]bot','website monitoring bot',
-'websnarf','websnarf',
-'webspider','webspider',
-'webvac','webvac',
-'webvulncrawl','webvulncrawl',
-'webwalker','webwalker',
-'webwalk','webwalk',
-'webwatch','webwatch',
-'wells_search','wells_search',
-'wer\-liefert\-was','wer-liefert-was',
-'wesee:search','wesee:search',
-'wevikabot','wevikabot',
-'whatuseek','whatuseek',
-'whowhere','whowhere',
-'windows\-rss\-platform','windows-rss-platform',
-'wired\-digital','wired-digital',
-'zyborg','zyborg',
-'wisenutbot','wisenutbot',
-'wiumi','wiumi',
-'wmir','wmir',
-'wolp','wolp',
-'wombat','wombat',
-'wonderer','wonderer',
-'woozweb','woozweb',
-'wordpress','wordpress',
-'worm','worm',
-'wume_crawler','wume_crawler',
-'wwwc','wwwc',
-'wwweasel','wwweasel',
-'wz101','wz101',
-'xget','xget',
-'xirq','xirq',
-'xydo','xydo',
-'y!j','y!j',
-'yahoo![\x20]searchmonkey','yahoo! searchmonkey',
-'yahoo!_mindset','yahoo!_mindset',
-'yahoo\-blogs','yahoo-blogs',
-'yahoo\-mmcrawler','yahoo-mmcrawler',
-'yahoo\-newscrawler','yahoo-newscrawler',
-'yahoo[\x20]pipes','yahoo pipes',
-'yahoo\-verticalcrawler','yahoo-verticalcrawler',
-'yahoocachesystem','yahoocachesystem',
-'yahooexternalcache','yahooexternalcache',
-'yahoofeedseeker','yahoofeedseeker',
-'yahooseeker\-testing','yahooseeker-testing',
-'yahooseeker','yahooseeker',
-'yahooysmcm','yahooysmcm',
-'yammer','yammer',
-'yanga','yanga',
-'yet\-another\-spider','yet-another-spider',
-'yeti','yeti',
-'yie8','yie8',
-'yodaobot','yodaobot',
-'yooglifetchagent','yooglifetchagent',
-'youdao','youdao',
-'yourls','yourls',
-'z\-add_link_checker','z-add_link_checker',
-'zealbot','zealbot',
-'zemanta','zemanta',
-'zend_http_client','zend_http_client',
-'zeus','zeus',
-'zhuaxia','zhuaxia',
-'[^a]fish','[^a]fish',
-'[\x20]netseer[\x20]',' netseer ',
-'^[1-3]$','^[1-3]$',
-'^finbot','^finbot',
-'^motorola$','^motorola$',
-'^msie','^msie',
-'^voyager/','^voyager',
-'^webindex$','webindex',
-'1\-more_scanner','1-more_scanner',
-# below placed at end to catch some generics
-'nbot','nbot',
-
-# Generic robot
-'robot','robot',
-'blog','blog',
-'checker','checker',
-'crawl','crawl',
-'discover','discover',
-'feed','feed',
-'fetcher','fetcher',
-'hunter','hunter',
-'link','link',
-'scanner','scanner',
-'seek','seek',
-'sitemap','sitemap',
-'spider','spider',
-'sucker','sucker',
-'survey','survey',
-'validator','validator',
-'bot[\s_+:,\.\;\/\\\-]','Unknown robot identified by bot\*',
-'[\s_+:,\.\;\/\\\-]bot','Unknown robot identified by \*bot',
-'curl','Curl',
-'php','A PHP script',
-'ruby/','Ruby script',
-'no_user_agent','empty user agent string',
-# Moving oBot towards the end so it does not pick up other *obot robots
-'oBot/','oBot',
-# Unknown robots identified by hit on robots.txt
-'unknown','Unknown robot (identified by hit on robots.txt)'
-);
-
-
-# RobotsAffiliateLib
-# This list try to tell by which Search Engine a robot is used
-#-------------------------------------------------------------
-%RobotsAffiliateLib = (
-);
-
-1;
diff --git a/test/awstats/conf/awstats.testnginx.conf b/test/awstats/conf/awstats.testnginx.conf
new file mode 100644
index 00000000..ffb43cd6
--- /dev/null
+++ b/test/awstats/conf/awstats.testnginx.conf
@@ -0,0 +1,806 @@
+# AWStats configure file
+#------------------------------------------------------------------------
+# Copy this file into awstats.www.myserver.mydomain.conf or awstats.conf
+# and edit this new file to setup AWStats.
+# If you don't understand what is a parameter, keep default value.
+#------------------------------------------------------------------------
+
+
+# Main setup section (Required to /test AWStats working)
+#------------------------------------------------------------------------
+
+# "LogFile" contains the web server log file to analyze.
+# Possible values: A full path, or a relative path from awstats.pl directory.
+# Example: "/var/log/apache/access.log"
+# Example: "../logs/mycombinedlog.log"
+# You can also use tags in this filename if you need a dynamic file name
+# depending on date or time (Replacement is made by AWStats at the beginning
+# of its execution). This is available tags :
+#   %YYYY-n  is replaced with 4 digits year we were n hours ago
+#   %YY-n    is replaced with 2 digits year we were n hours ago
+#   %MM-n    is replaced with 2 digits month we were n hours ago
+#   %MO-n    is replaced with 3 letters month we were n hours ago
+#   %DD-n    is replaced with day we were n hours ago
+#   %HH-n    is replaced with hour we were n hours ago
+#   %NS-n    is replaced with number of seconds at 00:00 since 1970
+#   %WM-n    is replaced with the week number in month (1-5)
+#   %Wm-n    is replaced with the week number in month (0-4)
+#   %WY-n    is replaced with the week number in year (01-52)
+#   %Wy-n    is replaced with the week number in year (00-51)
+#   %DW-n    is replaced with the day number in week (1-7, 1=sunday)
+#                              use n=24 if you need (1-7, 1=monday)
+#   %Dw-n    is replaced with the day number in week (0-6, 0=sunday)
+#                              use n=24 if you need (0-6, 0=monday)
+#   Use 0 for n if you need current year, month, day, hour...
+# Example: "/var/log/access_log.%YYYY-0%MM-0%DD-0.log"
+# Example: "C:/WINNT/system32/LogFiles/W3SVC1/ex%YY-24%MM-24%DD-24.log"
+# You can also use a pipe if log file come from a pipe.
+# Example: "gzip -d </var/log/apache/access.log.gz |"
+#
+LogFile="/home/ldestailleur/git/awstats/test/log/testnginx.log"
+
+
+# Enter the log file type you want to analyze.
+# Possible values:
+#  W - For a web log file
+#  M - For a mail log file
+#  F - For a ftp log file
+# Example: W
+# Default: W
+#
+LogType=W
+
+
+# Enter here your log format (Must match your web server config. See setup
+# instructions in documentation know how to configure your web server to have
+# the required log format).
+# Possible values: 1,2,3,4,5 or "your_own_personalized_log_format"
+# 1 - Apache or Lotus Notes/Domino native combined log format (NCSA combined/XLF/ELF log format)
+# 2 - IIS log format (W3C log format)
+# 3 - Webstar native log format
+# 4 - Apache or Squid native common log format (NCSA common/CLF log format)
+#     With LogFormat=4, some features (browsers, os, keywords...) can't work.
+# "your_own_personalized_log_format" = To use AWStats with any not If your log is a personalized format,
+#   you must use the following syntax keys to define the log format string:
+#   %host             Host client name or IP address
+#   %logname          Authenticated login/user used on protected pages
+#   %time1            Date and time with format: [dd/mmm/yyyy:hh:mm:ss +0000]
+#   %time1b           Date and time with format: [dd/mmm/yyyy:hh:mm:ss]
+#   %time2            Date and time with format: yyyy-mm-dd hh:mm:ss
+#   %methodurl        Method and URL with format: "GET /index.html HTTP/x.x"
+#   %methodurlnoprot  Method and URL with format: "GET /index.html"
+#   %method           Method with format: GET
+#   %url              URL only with format: /index.html
+#   %query            Query string (used by URLWithQuery option)
+#   %code             Return code status (with format for web log: 999)
+#   %bytesd           Size of document in bytes
+#   %refererquot      Referer page with format: "http://from.com/from.htm"
+#   %referer          Referer page with format: http://from.com/from.htm
+#   %uaquot           User agent with format: "Mozilla/4.0 (compatible, ...)"
+#   %ua               User agent with format: Mozilla/4.0_(compatible...)
+#   %gzipin           mod_gzip compression input bytes: In:XXX
+#   %gzipout          mod_gzip compression output bytes & ratio: Out:YYY:ZZpct.
+#   %gzipratio        mod_gzip compression ratio: ZZpct.
+#   %deflateratio     mod_deflate compression ratio with format: (ZZ)
+#   %email            EMail sender (for mail log)
+#   %email_r          EMail receiver (for mail log)
+#   %syslog           Syslog-specific time and host stamp with format: Mon dd hh:mm:ss hostname
+#   %virtualname      Web sever virtual hostname. Use this tag when same log
+#                     file contains data of several virtual web servers. The
+#                     SiteDomain will be used to filter the one you want.
+#   If your log format has some fields not included in this list, use
+#   %other            Means another field not used
+#
+# Examples for Apache combined logs (following two examples are equivalent):
+# LogFormat = 1
+# LogFormat = "%host %other %logname %time1 %methodurl %code %bytesd %refererquot %uaquot"
+#
+# Examples for IIS (following two examples are equivalent):
+# LogFormat = 2
+# LogFormat = "%time2 %host %logname %method %url %code %bytesd %other %ua %referer"
+#
+LogFormat="%host %other %logname %time1 %methodurl %code %bytesd %refererquot %uaquot"
+#LogFormat=1
+
+
+# Set this to a directory where you want AWStats to save its working files.
+# Need write permissions by web server user (user "nobody" with Unix OS).
+# Example: "/tmp"
+# Example: "../data"
+# Example: "C:/awstats_working_dir"
+# Default: "."          (means same directory as awstats.pl)
+#
+DirData="/home/ldestailleur/git/awstats/test/awstats/result" 	
+
+# Relative or absolute web URL of your awstats.pl directory.
+# Useful only when AWStats is used from command line.
+# Default: "/cgi-bin"   (means awstats.pl is in "/wwwroot/cgi-bin")
+#
+DirCgi="/cgi-bin"
+
+# Relative or absolute web URL of all icons subdirectories.
+# Default: "/icon" (means you must copy icons directories in "/wwwroot/icon")
+#
+DirIcons="/home/ldestailleur/git/awstats/wwwroot/icon"
+
+# If host names are already resolved in your logfile, set this to 0 will
+# increase performances.
+# Possible values: 1 or 0
+# Default: 1
+# 
+DNSLookup=1
+
+
+SiteDomain="__AWSTATS_CURRENT_CONFIG__"
+
+
+# Enter here all other possible domain names, addresses or virtual host
+# aliases someone can use to access your site. Try to keep only the minimum
+# number of possible names/addresses to have the best performances.
+# You can repeat the "SiteDomain" value in this list.
+# This parameter is used to analyze referer field in log file and to help
+# AWStats to know if a referer URL is a local URL of same site or an URL of
+# another site.
+# Note: Use space between each value.
+# Note: You can use regular expression values writing value with REGEX[value].
+# Example: "www.myserver.com localhost 127.0.0.1 REGEX[\.mydomain\.(net|org)$]"
+#
+HostAliases="localhost 127.0.0.1 REGEX[^.*\.myserver\.com$]"
+
+
+
+#------------------------------------------------------------------------
+# Optionnal setup section (Not required but increase AWStats features)
+#------------------------------------------------------------------------
+
+# When this parameter is set to 1, AWStats adds a button on report page to
+# allow to "update" statistics from a web browser. Warning, when "update" is
+# made from a browser, AWStats is run as a CGI by the web server user defined
+# in your web server (user "nobody" by default with Apache, "IUSR_XXX" with
+# IIS), so the "DirData" directory and all already existing history files
+# awstatsMMYYYY[.xxx].txt must be writable by this user. Change permissions if
+# necessary to "Read/Write" (and "Modify" for Windows NTFS file systems).
+# Warning: Update process can be long so you might experience "time out"
+# browser errors if you don't launch AWStats frequently enough.
+# When set to 0, update is only made when AWStats is run from the command
+# line interface (or a task scheduler).
+# Possible values: 0 or 1
+# Default: 0
+#
+AllowToUpdateStatsFromBrowser=1
+
+
+# If AWStats can purge log after processing it. By this way, the next time you
+# launch AWStats, log file will be smaller and processing time will be better.
+# IMPORTANT !!!
+# AWStats is able to find new lines in your log file to process only new ones,
+# so you can launch it as soon as you want, even with this parameter set to 0
+# but if you work with this value, you MUST have something to clean sometimes
+# your logfile if your web server don't do it.
+# Possible values: 1 or 0
+# Default: 0 	(but if you can, set this to 1 to increase speed)
+#
+#
+PurgeLogFile=0
+
+
+# When PurgeLogFile is setup to 1, AWStats will clean your log file after
+# processing it. You can however keep an archive file (saved in "DirData") of
+# all processed log records by setting this to 1 (For example if you want to
+# use another log analyzer).
+# This parameter is not used if PurgeLogFile=0
+# Possible values: 1 or 0
+# Default: 0
+#
+ArchiveLogRecords=0
+
+
+# MiscTrackerUrl can be used to make AWStats able to detect some miscellaneous
+# things, that can not be tracked on other way like:
+# - Screen size
+# - Color depth
+# - Java enabled
+# - Macromedia Director plugin
+# - Macromedia Shockwave plugin
+# - Realplayer G2 plugin
+# - QuickTime plugin
+# - Mediaplayer plugin
+# - Acrobat PDF plugin
+# To enable all this features, you must add the following HTML code at the end
+# of your index page (before </BODY>) :
+# <script language=javascript src="/js/awstats_misc_tracker.js"></script>
+# If code is not added in index page, all this detection capabilities will be
+# disabled. You must also check that ShowScreenSizeStats and ShowMiscStats
+# parameters are set to 1 to make results appear in report page.
+# If you change this parameter, you must also change the
+# awstatsmisctrackerurl variable into the awstats_misc_tracker.js file.
+# Change : Effective for new updates only.
+# Possible value: Name of javascript tracker file added in HTML code
+# Default: "/js/awstats_misc_tracker.js"
+#
+MiscTrackerUrl="/js/awstats_misc_tracker.js"
+
+
+# Add here a list of kind of url (file extension) that must be counted as
+# "Hit only" and not as a "Hit" and "Page/Download". You can set here all
+# images extensions as they are hit downloaded that must be counted but they
+# are not viewed pages. URLs with such extensions are not included in the TOP
+# Pages/URL report.
+# Note: If you want to exclude particular URLs from stats (No Pages and no
+# Hits reported), you must use SkipFiles parameter.
+# Change : Effective for new updates only
+# Example: "css js class gif jpg jpeg png bmp ico zip arj gz z wav mp3 wma mpg"
+# Example: ""
+# Default: "css js class gif jpg jpeg png bmp ico"
+#
+NotPageList="css js class gif jpg jpeg png bmp ico"
+
+
+# Default index page name for your web server.
+# Change : Effective for new updates only
+# Example: "index.php index.html default.html"
+# Default: "index.html"
+#
+DefaultFile="index.php index.html"
+
+
+
+
+#------------------------------------------------------------------------
+# Optionnal setup section (Not required but increase AWStats features)
+#------------------------------------------------------------------------
+
+# Set your primary language.
+# Possible value:
+#  Albanian=al, Bosnian=ba, Bulgarian=bg, Catalan=ca,
+#  Chinese (Taiwan)=tw, Chinese (Simpliefied)=cn, Czech=cz, Danish=dk,
+#  Dutch=nl, English=en, Estonian=et, Euskara=eu, Finnish=fi,
+#  French=fr, Galician=gl, German=de, Greek=gr, Hebrew=he, Hungarian=hu,
+#  Icelandic=is, Indonesian=id, Italian=it, Japanese=jp, Korean=kr,
+#  Latvian=lv, Norwegian (Nynorsk)=nn, Norwegian (Bokmal)=nb, Polish=pl,
+#  Portuguese=pt, Portuguese (Brazilian)=br, Romanian=ro, Russian=ru,
+#  Serbian=sr, Slovak=sk,  Spanish=es, Swedish=se, Turkish=tr, Ukrainian=ua,
+#  Welsh=wlk.
+#  First available language accepted by browser=auto
+# Default: "auto"
+#
+Lang="auto"
+
+
+# Do not include access from clients that match following criteria.
+# If your log file contains IP adresses in host field, you must enter here
+# matching IP adresses criteria.
+# If DNS lookup is already done in your log file, you must enter here hostname
+# criteria, else enter ip address criteria.
+# The opposite parameter of "SkipHosts" is "OnlyHosts".
+# Note: Use space between each value. This parameter is not case sensitive.
+# Note: You can use regular expression values writing value with REGEX[value].
+# Change : Effective for new updates only
+# Example: "127.0.0.1 REGEX[^192\.168\.] REGEX[^10\.0\.0\.]"
+# Example: "localhost REGEX[^.*\.localdomain$]"
+# Default: ""
+#
+SkipHosts=""
+
+
+# Do not include access from clients with a user agent that match following
+# criteria. If you want to exclude a robot, you should update the robots.pm
+# file instead of this parameter.
+# Note: Use space between each value. This parameter is not case sensitive.
+# Note: You can use regular expression values writing value with REGEX[value].
+# Change : Effective for new updates only
+# Example: "konqueror REGEX[ua_test_v\d\.\d]"
+# Default: ""
+#
+SkipUserAgents=""
+
+
+# Use SkipFiles to ignore access to URLs that match one of following entries.
+# You can enter a list of not important URLs (like framed menus, hidden pages,
+# etc...) to exclude them from statistics. You must enter here exact relative
+# URL as found in log file, or a matching REGEX value.
+# For example, to ignore /badpage.html, just add "/badpage.html". To ignore
+# all pages in a particular directory, add "REGEX[^\/directorytoexclude]".
+# The opposite parameter of "SkipFiles" is "OnlyFiles".
+# Note: Use space between each value. This parameter is not case sensitive.
+# Note: You can use regular expression values writing value with REGEX[value].
+# Change : Effective for new updates only
+# Example: "/badpage.html REGEX[^\/excludedirectory]"
+# Default: ""
+#
+SkipFiles=""
+
+
+# Some web servers on some Operating systems (IIS-Windows) considers that a
+# login with same value but different case are the same login. To tell AWStats
+# to also considers them as one, set this parameter to 1.
+# Possible values: 0 or 1
+# Default: 0
+# 
+AuthenticatedUsersNotCaseSensitive=1
+
+
+# Keep or remove the anchor string you can find in some URLs.
+# Possible values: 0 or 1
+# Default: 0
+#
+URLWithAnchor=0
+
+
+# In URL links, "?" char is used to add parameter's list in URLs. Syntax is:
+# /mypage.html?param1=value1&param2=value2
+# However, some servers/sites use also others chars to isolate dynamic part of
+# their URLs. You can complete this list with all such characters.
+# Change : Effective for new updates only
+# Example: "?;,"
+# Default: "?;"
+#
+URLQuerySeparators="?;"
+
+
+# Keep or remove the query string to the URL in the statistics for individual
+# pages. This is primarily used to differentiate between the URLs of dynamic
+# pages. If set to 1, mypage.html?id=x and mypage.html?id=y are counted as two
+# different pages.
+# Warning, when set to 1, memory required to run AWStats is dramatically
+# increased if you have a lot of changing URLs (for example URLs with a random
+# id inside). Such web sites should not set this option to 1 or use seriously
+# the next parameter URLWithQueryWithOnlyFollowingParameters (or eventually 
+# URLWithQueryWithoutFollowingParameters).
+# Change : Effective for new updates only
+# Possible values:
+# 0 - URLs are cleaned from the query string (ie: "/mypage.html")
+# 1 - Full URL with query string is used     (ie: "/mypage.html?p=x&q=y")
+# Default: 0
+# 
+URLWithQuery=1
+
+
+# When URLWithQuery is on, you will get the full URL with all parameters in
+# URL reports. But among thoose parameters, sometimes you don't need a
+# particular parameter because it does not identify the page or because it's
+# a random ID changing for each access even if URL points to same page. In
+# such cases, it is higly recommanded to ask AWStats to keep only parameters
+# you need (if you know them) before counting, manipulating and storing it.
+# Enter here list of wanted parameters. For example, with "param", one hit on
+# /mypage.cgi?param=abc&id=Yo4UomP9d and /mypage.cgi?param=abc&id=Mu8fdxl3r
+# will be reported as 2 hits on /mypage.cgi?param=abc
+# This parameter is not used when URLWithQuery is 0 and can't be used with
+# URLWithQueryWithoutFollowingParameters.
+# Change : Effective for new updates only
+# Example: "param"
+# Default: ""
+# 
+URLWithQueryWithOnlyFollowingParameters=""
+
+
+# When URLWithQuery is on, you will get the full URL with all parameters in
+# URL reports. But among thoose parameters, sometimes you don't need a
+# particular parameter because it does not identify the page or because it's
+# a random ID changing for each access even if URL points to same page. In
+# such cases, it is higly recommanded to ask AWStats to remove such parameters
+# from the URL before counting, manipulating and storing it. Enter here list
+# of all non wanted parameters. For example if you enter "id", one hit on
+# /mypage.cgi?p=abc&id=Yo4UomP9d and /mypage.cgi?p=abc&id=Mu8fdxl3r
+# will be reported as 2 hits on /mypage.cgi?p=abc
+# This parameter is not used when URLWithQuery is 0 and can't be used with
+# URLWithQueryWithOnlyFollowingParameters.
+# Change : Effective for new updates only
+# Example: "PHPSESSID jsessionid"
+# Default: ""
+# 
+URLWithQueryWithoutFollowingParameters="productId jsessionid"
+
+
+# Keep or remove the query string to the referrer URL in the statistics for
+# external referrer pages. This is used to differentiate between the URLs of
+# dynamic referrer pages. If set to 1, mypage.html?id=x and mypage.html?id=y
+# are counted as two different referrer pages.
+# Change : Effective for new updates only
+# Possible values:
+# 0 - Referrer URLs are cleaned from the query string (ie: "/mypage.html")
+# 1 - Full URL with query string is used      (ie: "/mypage.html?p=x&q=y")
+# Default: 0
+# 
+URLReferrerWithQuery=0
+
+
+# AWStats can detect setup problems or show you important informations to have
+# a better use. Keep this to 1, except if AWStats says you can change it.
+# Possible values: 1 or 0
+# Default: 1
+WarningMessages=1
+
+
+
+#-----------------------------------------------------------------------------
+# OPTIONAL ACCURACY SETUP SECTION (Not required but increase AWStats features)
+#-----------------------------------------------------------------------------
+
+# Following values allows you to define accuracy of AWStats entities (robots,
+# browsers, os, referers, file types) detection.
+# It is recommanded that very important web sites or ISP that provides AWStats
+# to their customer set this parameter to 1 (or 0), instead of 2.
+# Possible values:
+#  0 = No detection,
+#  1 = Medium/Standard detection
+#  2 = Full detection
+# Change : Effective for new updates only
+# Default: 2 (0 for LevelForWormsDetection)
+#
+LevelForBrowsersDetection=2				# 0 disables Browsers detection.
+LevelForOSDetection=2					# 0 disables OS detection.
+LevelForRefererAnalyze=2				# 0 disables Origin detection.
+LevelForRobotsDetection=2				# 0 disables Robots detection.
+LevelForSearchEnginesDetection=2		# 0 disables Search engines detection.
+LevelForKeywordsDetection=2				# 0 disables Keyphrases/Keywords detection.
+LevelForFileTypesDetection=1			# 0 disables File types detection.
+LevelForWormsDetection=2				# 0 disables Worms detection.
+
+
+
+#-----------------------------------------------------------------------------
+# OPTIONAL APPEARANCE SETUP SECTION (Not required but increase AWStats features)
+#-----------------------------------------------------------------------------
+
+# When you use AWStats as a CGI, you can have the reports shown in HTML frames.
+# Frames are only available for report viewed dynamically. When you build
+# pages from command line, this option is not used and no frames are built.
+# Possible values: 0 or 1
+# Default: 1
+#
+UseFramesWhenCGI=1
+
+
+# Each URL shown in stats page are links you can click.
+# Possible values: 1 or 0
+# Default: 1
+#
+ShowLinksOnUrl=1
+
+
+# List of visible flags that links to other language translations.
+# See Lang parameter for list of allowed flag/language codes.
+# If you don't want any flag link, set ShowFlagLinks to "".
+# This parameter is used only if ShowMenu parameter is set to 1.
+# Possible values: "" or "language_codes_separated_by_space"
+# Example: "en es fr nl es"
+# Default: ""
+#
+ShowFlagLinks="fr"
+
+
+# Search engines keywords reported are full search string or separate keywords
+# Possible values:
+# 0 - Search keywords reported are full search string (ie: "town maps")
+# 1 - Search keywords reported are separated words (ie: "town" and "maps")
+# Default: 0
+#
+SplitSearchString=0
+
+
+# You can put here HTML code that will be added at the end of AWStats reports.
+# Great to add advert ban.
+# Default: ""
+#
+HTMLEndSection=""
+
+
+# Value of maximum bar width/heigth for horizontal/vertical graphics bar
+# Default: 260/220
+#
+BarWidth   = 260
+BarHeight  = 220
+
+ 
+# This value can be used to choose maximum number of lines shown for each 
+# particular reporting.
+#
+# Stats by domains
+MaxNbOfDomain = 25
+# Stats by hosts
+MaxNbOfHostsShown = 25
+MinHitHost    = 1
+# Stats by authenticated users
+MaxNbOfLoginShown = 5
+MinHitLogin   = 1
+# Stats by robots
+MaxNbOfRobotShown = 25
+MinHitRobot   = 1
+# Stats by pages
+MaxNbOfPageShown = 25
+MinHitFile    = 1
+# Stats by referers
+MaxNbOfRefererShown = 25
+MinHitRefer   = 1
+# Stats for keywords
+MaxNbOfKeywordsShown = 25
+MinHitKeyword  = 1
+
+
+ShowHeader=1				# Show AWStats head title and icon
+ShowMenu=1					# Show menu header with links on detailed reports
+ShowMonthDayStats=1
+ShowDaysOfWeekStats=1
+ShowHoursStats=1
+ShowDomainsStats=1
+ShowHostsStats=1
+ShowAuthenticatedUsers=1
+ShowRobotsStats=1
+ShowPagesStats=1
+ShowCompressionStats=0		# Show report of compression stats when using mod_gzip
+ShowFileTypesStats=1
+ShowFileSizesStats=0		# Not yet available
+ShowBrowsersStats=1
+ShowOSStats=1
+ShowOriginStats=1
+ShowKeyphrasesStats=1
+ShowKeywordsStats=1
+ShowHTTPErrorsStats=1
+ShowWormsStats=1
+# Show misc chart
+# Default: a (See also MiscTrackerUrl parameter), Possible codes: ajdfrqwp
+ShowMiscStats=ajdfrqwp
+ShowScreenSizeStats=1
+
+
+# In the Origin chart, you have stats on where your hits came from. You can
+# includes hits on pages that comes from pages of same sites in this chart.
+# Possible values: 0 or 1
+# Default: 0
+#
+IncludeInternalLinksInOriginSection=1
+
+
+
+#-----------------------------------------------------------------------------
+# PLUGINS
+#-----------------------------------------------------------------------------
+
+# Add here all plugin files you want to load.
+# Plugin files must be .pm files stored in 'plugins' directory.
+# Uncomment LoadPlugin lines to enable a plugin after checking that perl
+# modules required by the plugin are installed.
+
+# PLUGIN: Tooltips
+# REQUIRED MODULES: None
+# PARAMETERS: None
+# DESCRIPTION: Add tooltips pop-up help boxes to HTML report pages.  
+# NOTE: This will increased HTML report pages size, thus server load and bandwidth.
+#
+#LoadPlugin="tooltips"
+
+# PLUGIN: DecodeUTFKeys
+# REQUIRED MODULES: Encode and URI::Escape
+# PARAMETERS: None
+# DESCRIPTION: Allow AWStats to show correctly (in language charset) 
+# keywords/keyphrases strings even if they were UTF8 coded by the 
+# referer search engine.
+#
+#LoadPlugin="decodeutfkeys"
+
+# PLUGIN: IPv6
+# PARAMETERS: None
+# REQUIRED MODULES: Net::IP and Net::DNS
+# DESCRIPTION: This plugin gives AWStats capability to make reverse DNS
+# lookup on IPv6 addresses.
+#
+#LoadPlugin="ipv6"
+
+# PLUGIN: HashFiles
+# REQUIRED MODULES: Storable
+# PARAMETERS: None
+# DESCRIPTION: AWStats DNS cache files are read/saved as native hash files. 
+# This increases DNS cache files loading speed, above all for very large web sites.
+#
+#LoadPlugin="hashfiles"
+
+
+# PLUGIN: UserInfo
+# REQUIRED MODULES: None
+# PARAMETERS: None
+# DESCRIPTION: Add a text (Firtname, Lastname, Office Department, ...) in 
+# authenticated user reports for each login value.
+# A text file called userinfo.myconfig.txt, with two fields (first is login,
+# second is text to show, separated by a tab char) must be created in DirData
+# directory.
+#
+#LoadPlugin="userinfo"
+
+# PLUGIN: HostInfo
+# REQUIRED MODULES: Net::XWhois
+# PARAMETERS: None
+# DESCRIPTION: Add a column into host chart with a link to open a popup window that shows
+# info on host (like whois records).
+#
+#LoadPlugin="hostinfo"
+
+# PLUGIN: ClusterInfo
+# REQUIRED MODULES: None
+# PARAMETERS: None
+# DESCRIPTION: Add a text (for example a full hostname) in cluster reports for each cluster
+# number. A text file called clusterinfo.myconfig.txt, with two fields (first is
+# cluster number, second is text to show) separated by a tab char. must be
+# created into DirData directory.
+# Note this plugin is useless if ShowClusterStats is set to 0 or if you don't
+# use a personalized log format that contains %cluster tag.
+#
+#LoadPlugin="clusterinfo"
+
+# PLUGIN: UrlAliases
+# REQUIRED MODULES: None
+# PARAMETERS: None
+# DESCRIPTION: Add a text (Page title, description...) in URL reports before URL value.
+# A text file called urlalias.myconfig.txt, with two fields (first is URL,
+# second is text to show, separated by a tab char) must be created into
+# DirData directory.
+#
+#LoadPlugin="urlalias"
+
+# PLUGIN: TimeHiRes
+# REQUIRED MODULES: Time::HiRes (if Perl < 5.8)
+# PARAMETERS: None
+# DESCRIPTION: Time reported by -showsteps option is in millisecond. For debug purpose.
+#
+#LoadPlugin="timehires"		
+
+# PLUGIN: TimeZone
+# REQUIRED MODULES: Time::Local
+# PARAMETERS: [timezone offset]
+# DESCRIPTION: Allow AWStats to adjust time stamps for a different timezone
+# This plugin reduces AWStats speed of 10% !!!!!!!
+# LoadPlugin="timezone"
+# LoadPlugin="timezone +2"
+# LoadPlugin="timezone CET"
+#
+#LoadPlugin="timezone +2"
+
+# PLUGIN: Rawlog
+# REQUIRED MODULES: None
+# PARAMETERS: None
+# DESCRIPTION: This plugin adds a form in AWStats main page to allow users to see raw
+# content of current log files. A filter is also available.
+#
+#LoadPlugin="rawlog"
+
+# PLUGIN: GraphApplet
+# REQUIRED MODULES: None
+# PARAMETERS: [CSS classes to override]
+# DESCRIPTION: Supported charts are built by a 3D graphic applet.
+#
+#LoadPlugin="graphapplet /awstatsclasses"				# EXPERIMENTAL FEATURE
+
+# PLUGIN: GraphGoogleChartAPI
+# REQUIRED MODULES: None
+# PARAMETERS: None
+# DESCRIPTION: Replaces the standard charts with free Google API generated images 
+# in HTML reports. If country data is available and more than one country has hits, 
+# a map will be generated using Google Visualizations.
+# Note: The machine where reports are displayed must have Internet access for the 
+# charts to be generated. The only data sent to Google includes the statistic numbers, 
+# legend names and country names.
+# Warning: This plugin is not compatible with option BuildReportFormat=xhtml. 
+#
+#LoadPlugin="graphgooglechartapi"
+
+# PLUGIN: GeoIPfree
+# REQUIRED MODULES: Geo::IPfree version 0.2+ (from Graciliano M.P.)
+# PARAMETERS: None
+# DESCRIPTION: Country chart is built from an Internet IP-Country database.
+# This plugin is useless for intranet only log files.
+# Note: You must choose between using this plugin (need Perl Geo::IPfree
+# module, database is free but not up to date) or the GeoIP plugin (need
+# Perl Geo::IP module from Maxmind, database is also free and up to date).
+# Note: Activestate provide a corrupted version of Geo::IPfree 0.2 Perl
+# module, so install it from elsewhere (from www.cpan.org for example).
+# This plugin reduces AWStats speed by up to 10% !
+#
+#LoadPlugin="geoipfree"
+
+# MAXMIND GEO IP MODULES: Please see documentation for notes on all Maxmind modules
+
+# PLUGIN: GeoIP
+# REQUIRED MODULES: Geo::IP or Geo::IP::PurePerl (from Maxmind)
+# PARAMETERS: [GEOIP_STANDARD | GEOIP_MEMORY_CACHE] [/pathto/geoip.dat] </pathto/override.txt>
+# DESCRIPTION: Builds a country chart and adds an entry to the hosts 
+# table with country name
+# Replace spaces in the path of geoip data file with string "%20".
+#
+LoadPlugin="geoip GEOIP_STANDARD /home/ldestailleur/git/awstats/test/maxmind/GeoIP.dat"
+
+# PLUGIN: GeoIP_City_Maxmind
+# REQUIRED MODULES: Geo::IP or Geo::IP::PurePerl (from Maxmind)
+# PARAMETERS: [GEOIP_STANDARD | GEOIP_MEMORY_CACHE] [/pathto/GeoIPCity.dat] </pathto/override.txt>
+# DESCRIPTION: This plugin adds a column under the hosts field and tracks the pageviews
+# and hits by city including regions.
+# Replace spaces in the path of geoip data file with string "%20".
+#
+LoadPlugin="geoip_city_maxmind GEOIP_STANDARD /home/ldestailleur/git/awstats/test/maxmind/GeoIPCity-532.dat"
+
+# PLUGIN: GeoIP_ASN_Maxmind
+# REQUIRED MODULES: Geo::IP or Geo::IP::PurePerl (from Maxmind)
+# PARAMETERS: [GEOIP_STANDARD | GEOIP_MEMORY_CACHE] [/pathto/GeoIPASN.dat[+/pathto/override.txt][+http://linktoASlookup]]
+# DESCRIPTION: This plugin adds a chart of AS numbers where the host IP address is registered. 
+# This plugin can display some ISP information if included in the database. You can also provide 
+# a link that will be used to lookup additional registration data. Put the link at the end of 
+# the parameter string and the report page will include the link with the full AS number at the end.
+# Replace spaces in the path of geoip data file with string "%20".
+#
+#LoadPlugin="geoip_asn_maxmind GEOIP_STANDARD /home/ldestailleur/git/awstats/test/maxmind/GeoIP.dat+http://enc.com.au/itools/aut-num.php?autnum="
+
+# PLUGIN: GeoIP_Region_Maxmind
+# REQUIRED MODULES: Geo::IP or Geo::IP::PurePerl (from Maxmind)
+# PARAMETERS: [GEOIP_STANDARD | GEOIP_MEMORY_CACHE] [/pathto/GeoIPRegion.dat]
+# DESCRIPTION:This plugin adds a chart of hits by regions. Only regions for US and 
+# Canada can be detected.
+# Replace spaces in the path of geoip data file with string "%20".
+#
+LoadPlugin="geoip_region_maxmind GEOIP_STANDARD /home/ldestailleur/git/awstats/test/maxmind/GeoIPRegion-515.dat"
+
+# PLUGIN: GeoIP_ISP_Maxmind
+# REQUIRED MODULES: Geo::IP or Geo::IP::PurePerl (from Maxmind)
+# PARAMETERS: [GEOIP_STANDARD | GEOIP_MEMORY_CACHE] [/pathto/GeoIPISP.dat]
+# DESCRIPTION: This plugin adds a chart of hits by ISP.
+# Replace spaces in the path of geoip data file with string "%20".
+#
+#LoadPlugin="geoip_isp_maxmind GEOIP_STANDARD /home/ldestailleur/git/awstats/test/maxmind/GeoIPISP-122.dat"
+
+# PLUGIN: GeoIP_Org_Maxmind
+# REQUIRED MODULES: Geo::IP or Geo::IP::PurePerl (from Maxmind)
+# PARAMETERS: [GEOIP_STANDARD | GEOIP_MEMORY_CACHE] [/pathto/GeoIPOrg.dat]
+# DESCRIPTION: This plugin add a chart of hits by Organization name
+# Replace spaces in the path of geoip data file with string "%20".
+#
+LoadPlugin="geoip_org_maxmind GEOIP_STANDARD /home/ldestailleur/git/awstats/test/maxmind/GeoIPOrg-111.dat"
+
+
+
+#-----------------------------------------------------------------------------
+# EXTRA SECTION
+#-----------------------------------------------------------------------------
+
+# WARNING: Extra sections are experimental feature not stable yet !!!
+
+# You can define your own charts, you choose here what are rows and columns
+# keys. This feature is particularly useful for marketing purpose, tracking
+# products orders for example.
+# For this, edit all parameters of Extra section. Each set of parameter is a
+# different chart. For several charts, duplicate section changing the number.
+# Note that each Extra section reduces AWStats speed by 10%.
+#
+# WARNING: A wrong setup of Extra section can result in a too large arrays
+# that will consume all your memory, making AWStats unusable after several
+# updates, so be sure to setup it correctly.
+# In most cases, you don't need this feature.
+#
+# ExtraSectionNameX is title of your personalized chart.
+# ExtraSectionConditionalX are conditions on URL and/or QUERY_STRING and/or
+#   REFERER you can use to count or not the hit. Use "|" for "OR".
+# ExtraSectionFirstColumnTitleX is the first column title of the chart.
+# ExtraSectionFirstColumnValuesX is a Regex string to tell AWStats how to 
+#   extract the value used for first column. Each different value found will
+#   be a different row. Be sure that list of different values is "limited" to
+#   avoid "not enough memory" problems !
+# ExtraSectionStatTypesX are things you want to count. You can use standard
+#   code letters (P for pages,H for hits,B for bandwidth,L for last access).
+# MaxNbOfExtraX is maximum number of rows shown in chart.
+# MinHitExtraX is minimum number of hits required to be shown in chart.
+#
+
+# Example to report the 20 products the most ordered by "order.cgi" script
+ExtraSectionName1="Product orders"
+ExtraSectionCondition1="URL,\/cgi\-bin\/order\.cgi|URL,\/cgi-bin2\/order\.cgi"
+ExtraSectionFirstColumnTitle1="Product ID"
+ExtraSectionFirstColumnValues1="QUERY_STRING,productId=([^&]+)"
+ExtraSectionStatTypes1=PL
+MaxNbOfExtra1=20
+MinHitExtra1=1
+
+
+ExtraSectionName2="Redirect"
+ExtraSectionCondition2="URL,\/cgi\-bin\/awredir\.pl"
+ExtraSectionFirstColumnTitle2="Url"
+ExtraSectionFirstColumnValues2="QUERY_STRING,url=([^&]+)"
+ExtraSectionStatTypes2=HL
+MaxNbOfExtra2=20
+MinHitExtra2=1
diff --git a/test/test.pl b/test/test.pl
index 9655d1a1..5251bd48 100755
--- a/test/test.pl
+++ b/test/test.pl
@@ -13,38 +13,42 @@ $PERL="perl";
 
 @TESTLIST=(
 "testglobal",
-"testlogins",
-"testworms",
-"testipv6",
-"testdnsdone",
-"testextra",
-"testgeoip",
-"testgeoip_region_maxmind",
-"testgeoip_city_maxmind",
+"testsmall",
+"testnginx",
+"testtime5",
+#"testlogins",
+#"testworms",
+#"testipv6",
+#"testdnsdone",
+#"testextra",
+#"testgeoip",
+#"testgeoip_region_maxmind",
+#"testgeoip_city_maxmind",
 "testgeoip_isp_maxmind",
-"testgeoip_org_maxmind",
-"testrobot",
-"benchmark",
-"testmoddeflate","testmodgzip","testmodgzip2","testmodgzip3",
-"testurlwithquery",
-"testwindowsmediaserver","testwindowsmediaserver9","testrealmediaserver","testdarwinserver",
-"testsquidextended",
-"testisa1",
-"testisa2",
-"testlotus",
-"testlotus65",
-"testwebstar",
-"testzope",
-"testcluster",
-"testoracle9ias",
-"testproftp","testproftp2","testvsftpd",
-"testskipfiles",
-"testvirtualhosts",
-"testsendmail",
-"testpostfix",
-"testpostfix1",
-"testpostfix4",
-"testexchange");
+#"testgeoip_org_maxmind",
+#"testrobot",
+#"benchmark",
+#"testmoddeflate","testmodgzip","testmodgzip2","testmodgzip3",
+#"testurlwithquery",
+#"testwindowsmediaserver","testwindowsmediaserver9","testrealmediaserver","testdarwinserver",
+#"testsquidextended",
+#"testisa1",
+#"testisa2",
+#"testlotus",
+#"testlotus65",
+#"testwebstar",
+#"testzope",
+#"testcluster",
+#"testoracle9ias",
+#"testproftp","testproftp2","testvsftpd",
+#"testskipfiles",
+#"testvirtualhosts",
+#"testsendmail",
+#"testpostfix",
+#"testpostfix1",
+#"testpostfix4",
+#"testexchange"
+);
 
 #@TESTLIST=("testglobal","testsmall","testtime5");
 #@TESTLIST=("testlogins");
@@ -88,7 +92,7 @@ while(1==1)
 {
 	
 	print "Choose test to execute...\n";
-	sprintf("$02i %s",0,"All");
+	sprintf("$2i %s", 0, "All");
 	my $i=1;
 	foreach my $key (@TESTLIST) {
 	    print sprintf("%02i) %s\n",$i,$key);
@@ -106,7 +110,7 @@ while(1==1)
 	else { push @chosen, $TESTLIST[$bidon-1]; }
 	
 	# Option output
-	print "Choose output option (browserdetail, osdetail, ...)\n";
+	print "Choose output option ('', 'browserdetail', 'osdetail', ...)\n";
 	$bidon='';
 	print "Your choice : ";
 	$bidon=<STDIN>;