Update test files

author Laurent Destailleur <eldy@destailleur.fr>

Sun, 28 Jul 2024 13:42:06 +0000 (15:42 +0200)

committer Laurent Destailleur <eldy@destailleur.fr>

Sun, 28 Jul 2024 13:42:06 +0000 (15:42 +0200)
author Laurent Destailleur <eldy@destailleur.fr>
Sun, 28 Jul 2024 13:42:06 +0000 (15:42 +0200)
committer Laurent Destailleur <eldy@destailleur.fr>
Sun, 28 Jul 2024 13:42:06 +0000 (15:42 +0200)
diff --git a/robots.pm b/robots.pm

deleted file mode 100644 (file)

index 6290432..0000000
--- a/robots.pm
+++ /dev/null
@@ -1,2786 +0,0 @@
-# AWSTATS ROBOTS DATABASE\r
-#-------------------------------------------------------\r
-# If you want to add robots to extend AWStats database detection capabilities,\r
-# you must add an entry in RobotsSearchIDOrder_listx and RobotsHashIDLib.\r
-\r
-# The entry in RobotsSearchIDOrder_listx is a Perl regular expression\r
-# (see http://perldoc.perl.org/perlreref.html). AWSTats applies these\r
-# expressions to the user agent string in the order given by the lists. The\r
-# first match specifies the robot.\r
-#\r
-# Note: This regular expression must not contain any whitespace.\r
-# Otherwise AWStats will produce lines in the database that\r
-# will be misinterpreted and as a consequence the corresponding data in the\r
-# generated HTML reports will be wrong. If you want to match whitespace in\r
-# the user agent string, use other constructs like '\s', '[:blank:]',\r
-# '\p{IsSpace}', '\x20' etc.\r
-#\r
-# The corresponding entry in RobotsHashIDLib contains the regular expression\r
-# as key, followed by a string containing HTML-text. AWStats inserts this\r
-# text into reports to describe the bot. If possible the text should contain\r
-# a link to the bot home page. This makes it easier for sysadmins to find\r
-# the information necessary e.g. to adapt the robots.txt file.\r
-#\r
-# An entry in the RobotsAffiliateLib is not necessary. An entry in this list\r
-# contains as first part the regular expression specifying the bot. The\r
-# second part is a string that gives the Company or product managing the bot.\r
-# This information is not used yet.\r
-#\r
-# There are several sorts of bots that AWStats is not able to detect and\r
-# therefore a considerable amount of bot generated traffic counts\r
-# as user traffic:\r
-#\r
-# a) A crawler that identifies itself in the referrer string, but not in\r
-#    the user agent string. An example is the crawler from semalt.semalt.com.\r
-#\r
-# b) Crawlers that correctly access robots.txt but identify themselves in\r
-#    in the user agent string only once or just a few times. Most of the\r
-#    time a user agent string ist used that does not contain hints that\r
-#    a bot is involved. An example is the iCjobs spider.\r
-#    msnbot-UDiscovery/2.0b seems to show this behaviour too.\r
-#\r
-#\r
-#\r
-#-------------------------------------------------------\r
-\r
-# 2023-07-04 RobC \r
-#              Removed Dalvik as native Android UI Browser User Agent\r
-#              Removed CFNetwork as native iOS and OSX Browser User Agent\r
-\r
-# 2021-05--05 RobC\r
-\r
-# Removed Baidu catchall because its picking up baidu.sogo.uc.UCBrowser which is a phone browser\r
-# Added baiduspider- catchall instead\r
-\r
-# Newly added from 2021-05-05\r
-# Adsbot\r
-# BW/\r
-# Bytespider\r
-# CheckMarkNetwork/\r
-# DuckDuckBot\r
-# # Foregenix Web Scan\r
-# IonCrawl\r
-# Linguee Bot\r
-# Neevabot\r
-# PetalBot\r
-# TkBot\r
-# vuhuvBot\r
-\r
-\r
-# 2018-03-13 RobC \r
-#              Added 36 robots and one generic ( survey ) using v 7.7 robots file as base. \r
-#              Also moved robot "Obot" into generics so that it is singled out as an individual Robot.         \r
-#\r
-# 2016-09-02 RobC \r
-#              Fixed a few errors and added a few missing bots from awstats 7.5 release.\r
-#\r
-# 2016-08-28 RobC \r
-#              Complete re-build of this file almost from scratch.\r
-#              dropped many old bots, added many new bots and reordered file.\r
-#              edited and added regex expressions to stop spaces causing problems.\r
-#              You should tune file by placing the most common robots crawling your site at top \r
-#              in List1.\r
-#\r
-#\r
-#              N.B. many bots need to be in correct order so don't chnage order without checking if\r
-#              change will cause counts to be allocated to wrong bot. Not always simple.\r
-#\r
-#\r
-# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html\r
-#              added dipsie (not tested with real data).\r
-#              added DomainsDB.net http://domainsdb.net/\r
-#              added ia_archiver-web.archive.org (was inadvertently grouped with Alexa traffic)\r
-#              added Nutch (used by looksmart (furl?))\r
-#              added rssImagesBot\r
-#              added Sqworm\r
-#              added t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e\r
-#              added w3c css-validator\r
-#              added documentation link to bot home pages for above and selected major bots.\r
-#                    In the case of international bots, choose .com page.\r
-#                    Included tool tip (html "title").\r
-#                    To do: parameterize to match both AWStats language and tooltips settings.\r
-#                    To do: add html links for all bots based on current documentation in source\r
-#                           files referenced below.\r
-#              changed '\wbot[\/\-]', to '\wbot[\/\-]' (removed comma)\r
-#              made minor grammar corrections to notes below\r
-# 2005-08-24   added YahooSeeker-Testing\r
-#                      added w3c-checklink\r
-#                      updated url for ask.com\r
-# 2005-08-24           added Girafabot http://www.girafa.com/\r
-# 2005-08-30           added PluckFeedCrawler http://www.pluck.com/\r
-#              added Gaisbot/3.0 (robot05@gais.cs.ccu.edu.tw; )\r
-#              dded geniebot (wgao@genieknows.com)\r
-#              added BecomeBot link http://www.become.com/site_owners.html\r
-#              added topicblogs http://www.topicblogs.com/\r
-#              added Powermarks; seen used by referrer spam\r
-#              added YahooSeeker\r
-#              added NG/2. http://www.exabot.com/\r
-# 2005-09-15   added link for Walhello appie\r
-#              added bender focused_crawler\r
-#              updated YahooSeeker description (blog crawler)\r
-# 2005-09-16   added link for http://linkchecker.sourceforge.net\r
-#              added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl)\r
-#              added Blogslive  info@blogslive.com intelliseek.com\r
-#              added BlogPulse (ISSpider-3.0) intelliseek.com\r
-# 2005-09-26   added Feedfetcher-Google (http://www.google.com/feedfetcher.html)\r
-#              added EverbeeCrawler\r
-#              added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html\r
-#              added link for Bloglines http://www.bloglines.com\r
-# 2005-10-19   fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html)\r
-#              added Blogshares Spiders (Synchronized V1.5.1)\r
-#              added yacy\r
-# 2005-11-21   added Argus www.simpy.com\r
-#              added BlogsSay :: RSS Search Crawler (http://www.blogssay.com/)\r
-#              added MJ12bot http://majestic12.co.uk/bot.php\r
-#              added OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)\r
-#              added OutfoxBot/0.3 (For internet experiments; outfox.agent@gmail.com)\r
-#              added RufusBot Rufus Web Miner http://64.124.122.252.webaroo.com/feedback.html\r
-#              added Seekbot (http://www.seekbot.net/bot.html)\r
-#              added Yahoo-MMCrawler/3.x (mms-mmcrawler-support@yahoo-inc.com)\r
-#               added link for BaiDuSpider\r
-#              added link for Blogshares Spider\r
-#              added link for StackRambler http://www.rambler.ru/doc/faq.shtml\r
-#              added link for WISENutbot\r
-#              added link for ZyBorg/1.0 (wn-14.zyborg@looksmart.net; http://www.WISEnutbot.com.  Moved location to above wisenut to avoid classification as wisenut\r
-# 2005-12-15\r
-#              added FAST Enteprise Crawler/6 (www dot fastsearch dot com). Note spelling Enteprise not Enterprise.\r
-#              added findlinks http://wortschatz.uni-leipzig.de/findlinks/\r
-#              added IBM Almaden Research Center WebFountain™ http://www.almaden.ibm.com/cs/crawler [hc3]\r
-#              added INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)\r
-#              added lmspider (lmspider@scansoft.com) http://www.nuance.com/\r
-#              added noxtrumbot http://www.noxtrum.com/\r
-#              added SandCrawler (Microsoft)\r
-#              added SBIder http://www.sitesell.com/sbider.html\r
-#              added SeznamBot http://fulltext.seznam.cz/\r
-#              added sohu-search http://corp.sohu.com/ (looked for //robots.txt not /robots.txt)\r
-#              added the ruffle SemanticWeb crawler v0.5 - http://www.unreach.net\r
-#              added WebVulnCrawl/1.0 libwww-perl/5.803 (looked for //robots.txt not /robots.txt)\r
-#              added Yahoo! Japan keyoshid http://www.yahoo.co.jp/\r
-#              added Y!J http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html\r
-#              added link for GigaBot\r
-#              added link for MagpieRSS\r
-#              added link for MSIECrawler\r
-# 2005-12-21\r
-#              added aipbot http://www.aipbot.com aipbot@aipbot.com [matthys70 users.sourceforge.net]\r
-#              added Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)\r
-#              added Fast-Search-Engine http://www.fast-search-engine.com/ [matthys70  users.sourceforge.net]\r
-#              added g2Crawler (nobody@airmail.net) http://crawler.instantnetworks.net/\r
-#              added Jakarta commons-httpclient http://jakarta.apache.org/commons/httpclient/ (hit robots.txt).  May be used as robot or browser - a site may want to remove this entry.\r
-#              added OmniExplorer_Bot http://www.omni-explorer.com/ [matthys70 users.sourceforge.net]\r
-#              added USTC-Semantic-Group ai.ustc.edu.cn/mas/en/research/index.php ?\r
-# 2005-12-22\r
-#              added EARTHCOM.info www.earthcom.info\r
-#              added HTTrack off-line browser 'httrack','HTTrack', http://www.httrack.com/ [Moizes Gabor]\r
-#              added KummHttp http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b [Moizes Gabor]\r
-# 2006-01-01\r
-#              added Dulance http://www.dulance.com/bot.jsp\r
-#              added MojeekBot http://www.mojeek.com/bot.html\r
-#              added nicebot http://www.egghelp.org/setup.htm ?\r
-#              added Snappy http://www.urltrends.com/faq.php\r
-#              added sohu agent\r
-#              added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net]\r
-#              added zspider http://feedback.redkolibri.com/\r
-# 2006-01-13\r
-#              added boitho.com-dc http://www.boitho.com/dcbot.html\r
-#              added IRLbot http://irl.cs.tamu.edu/crawler\r
-#              added virus_detector virus_harvester@securecomputing.com\r
-#              added Wavefire http://www.wavefire.com; info@wavefire.com\r
-\r
-#              added WebFilter Robot\r
-# 2006-01-24\r
-#              added Shim-Crawler http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp\r
-#              added Exabot exabot.com\r
-#              added LetsCrawl.com http://letscrawl.com\r
-#              added ichiro http://help.goo.ne.jp/door/crawlerE.html\r
-# 2006-01-27    additional 22 robots from a list provided by Moizes Gabor\r
-#              added ALeadSoftbot      http://www.aleadsoft.com/bot.htm\r
-#              added CipinetBot        http://www.cipinet.com/bot.html\r
-#              added Cuasarbot http://www.cuasar.com/\r
-#              added Dumbot    http://www.dumbfind.com/\r
-#              added Extreme_Picture_Finder    http://www.exisoftware.com/\r
-#              added Fooky.com/ScorpionBot/ScoutOut    http://www.fooky.com/scorpionbots\r
-#              added IlTrovatore-Setaccio      http://www.iltrovatore.it/aiuto/motore_di_ricerca.html  bot@iltrovatore.it\r
-#              added InsurancoBot      http://www.fastspywareremoval.com/\r
-#              added InternetArchive   http://lucene.apache.org/nutch/bot.html         nutch-agent@lucene.apache.org\r
-#              added KazoomBot http://www.kazoom.ca/bot.html   kazoombot@kazoom.ca\r
-#              added Kurzor    http://www.easymail.hu/ cursor@easymail.hu\r
-#              added NutchCVS  http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org\r
-#              added NutchOSU-VLIB     http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org\r
-#              added Orbiter   http://www.dailyorbit.com/bot.htm\r
-#              added PHP_version_tracker       http://www.nexen.net/phpversion/bot.php\r
-#              added SuperBot  http://www.sparkleware.com/superbot/\r
-#              added SynooBot  http://www.synoo.de/bot.html    webmaster@synoo.com\r
-#              added TestBot   http://www.agbrain.com/\r
-#              added TutorGigBot       http://www.tutorgig.info/\r
-#              added WebIndexer        mailto://webindexerv1@yahoo.com\r
-#              added WebMiner  http://64.124.122.252/feedback.html\r
-# 2006-02-01\r
-#              added heritrix https://sourceforge.net/forum/message.php?msg_id=3550202\r
-#              added Zeus Webster Pro https://sourceforge.net/forum/message.php?msg_id=3141164\r
-#               additional robots from a list provided by Moizes Gabor [ mojzi -a-t- free mail hu ]\r
-#              added Candlelight_Favorites_Inspector\r
-#              added DomainChecker\r
-#              added EasyDL\r
-#              added FavOrg\r
-#              added Favorites_Sweeper\r
-#              added Html_Link_Validator\r
-#              added Internet_Ninja\r
-#              added JRTwine_Software_Check_Favorites_Utility\r
-#              fixed Microsoft_URL_Control\r
-#              added miniRank\r
-#              added Missigua_Locator\r
-#              added NPBot\r
-#              added Ocelli\r
-#              added Onet.pl_SA\r
-#              added proodleBot\r
-#              added SearchGuild_DMOZ_Experiment\r
-#              added Susie\r
-#              added Website_Monitoring_Bot\r
-#              added Xenu_Link_Sleuth\r
-# 2006-05-15\r
-#              added ASPseek http://www.aspseek.org/\r
-#              added AdamM Bot http://home.blic.net/adamm/\r
-#              added archive.org_bot http://crawls.archive.org/collections/bncf/crawl.html\r
-#              added arianna.libero.it (Italian Portal/search engine)\r
-#              added Biz360 spider http://www.biz360.com\r
-#              added BlogBridge Service http://www.blogbridge.com/\r
-#              added BlogSearch http://www.icerocket.com/\r
-#              added libcrawl\r
-#              added edgeio-relanshanbottriever http://www.edgeio.com\r
-#              added FeedFlow http://feedflow.com/about\r
-#              added Biblioteca Nazionale Centrale di Firenze (Italian National Archive) http://www.bncf.firenze.sbn.it/raccolta.txt\r
-#              added Java catchall - used by many spam bots\r
-#              added lanshanbot http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_g_l_140406_1%5Cb\r
-#              added msnbot-media http://search.msn.com/msnbot.htm\r
-#              added MT::Telegraph::Agent\r
-#              added Netluchs http://www.netluchs.de/ (German SE bot)\r
-#              added oBot http://www.webmasterworld.com/forum11/1616.htm\r
-#              added Onfolio http://www.onfolio.com/  (IE Toolbar plugin) - hit rss feeds.\r
-#              added ping.blo.gs http://blo.gs/ping.php blog bot\r
-#              added Sphere Scout http://www.sphere.com/\r
-#              added sproose crawler http://www.sproose.com/bot.html\r
-#              added SyndicAPI http://syndicapi.com/bot.html\r
-#              added Yahoo! Mindset http://mindset.research.yahoo.com/\r
-#              added msrabot\r
-#              added Vagabondo & Vagabondo-WAP http://www.wise-guys.nl/Contact/index.php?botselected=webagents)#=uk\r
-#              fixed Missigua Locator detection (Missigua_Locator -> Missigua Locator)\r
-#              changed echo to echo! to avoid conflict with the bonecho (Firefox 2.0) browser.\r
-#                      This requires you to reprocess historic logs if you want EchO! to be recognized for older reports.\r
-# 2006-05-17\r
-#              added Alpha Search Agent # 62.152.125.60 Eurologon Srl\r
-#              added Krugle http://www.krugle.com/crawler/info.html the search engine for developers\r
-#              added Octora Beta Bot http://www.octora.com/ # Blog and Rss Search Engine\r
-#              added UbiCrawler http://law.dsi.unimi.it/ubicrawler/\r
-#              added Yahoo! Slurp China http://misc.yahoo.com.cn/help.html\r
-#                      You must reprocess old logs for the Yahoo! Slurp China bot to be detected in old reports\r
-# 2006-05-20\r
-#              added 1-More Scanner http://www.myzips.com/software/1-More-Scanner.phtml\r
-#              added Accoona-AI-Agent http://www.accoona.com/\r
-#              added ActiveBookmark http://www.libmaster.com/active_bookmark.php\r
-#              added BIGLOTRON http://www.biglotron.com/robot.html\r
-#              added Bookmark-Manager http://bkm.sourceforge.net/\r
-#              added cbn00glebot\r
-#              added Cerberian Drtrs http://www.pgts.com.au/cgi-bin/psql?robot_info=25240\r
-#              added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork\r
-#              added CheckWeb link validator http://p.duby.free.fr/chkweb.htm\r
-#              added Computer and Automation Research Institute Crawler http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html\r
-#              added ConveraCrawler http://www.authoritativeweb.com/crawl/\r
-#              added ConveraMultiMediaCrawler http://www.authoritativeweb.com/crawl/\r
-#              added CSE HTML Validator Lite Online http://online.htmlvalidator.com/php/onlinevallite.php\r
-#              added Cursor http://adcenter.hu/docs/en/bot.html\r
-#              added Custo http://www.netwu.com/custo/\r
-#              added DataFountains/DMOZ Downloader http://infomine.ucr.edu/\r
-#              added Deepindex http://www.deepindex.net/faq.php\r
-#              added DNSGroup http://www.dnsgroup.com/\r
-#              added DoCoMo http://www.nttdocomo.co.jp/\r
-#              added dumm.de-Bot http://www.dumm.de/\r
-#              added ETS v http://www.freetranslation.com/help/\r
-#              added eventax http://www.eventax.de/\r
-#              added FAST Enterprise Crawler * crawleradmin.t-info@telekom.de http://www.telekom.de/\r
-#              added FAST Enterprise Crawler http://www.fast.no/\r
-#              added FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de http://www.telekom.de/\r
-#              added FeedValidator http://feedvalidator.org/\r
-#              added FilmkameraBot http://www.filmkamera.at/bot.html\r
-#              added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece\r
-#              added Global Fetch http://www.wesonet.com/\r
-#              added GOFORITBOT http://www.goforit.com/about/\r
-#              added GoForIt.com http://www.goforit.com/about/\r
-#              added GPU p2p crawler http://gpu.sourceforge.net/search_engine.php\r
-#              added HooWWWer http://cosco.hiit.fi/search/hoowwwer/\r
-#              added HPPrint\r
-#              added HTMLParser http://htmlparser.sourceforge.net/\r
-#              added Hundesuche.com-Bot http://www.hundesuche.com/\r
-#              added InfoBot http://www.infobot.org/\r
-#              added InfociousBot http://corp.infocious.com/tech_crawler.php\r
-#              added InternetSupervision http://internetsupervision.com/\r
-#              added isearch2006 http://www.yahoo.com.cn/\r
-#              added IUPUI_Research_Bot http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/\r
-#              added KalamBot http://64.124.122.251/feedback.html\r
-#              added kamano.de NewsFeedVerzeichnis http://www.kamano.de/\r
-#              added Kevin http://dznet.com/kevin/\r
-#              added KnowItAll http://www.cs.washington.edu/research/knowitall/\r
-#              added Knowledge.com http://www.knowledge.com/\r
-#              added Kouaa Krawler http://www.kouaa.com/\r
-#              added ksibot http://ego.ms.mff.cuni.cz/\r
-#              added Link Valet Online http://www.htmlhelp.com/tools/valet/\r
-#              added lwp-request http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request\r
-#              added lwp-trivial http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm\r
-#              added MapoftheInternet.com http://MapoftheInternet.com/\r
-#              added Matrix S.p.A. - FAST Enterprise Crawler http://tin.virgilio.it/\r
-#              added Megite http://www.megite.com/\r
-#              added Metaspinner http://index.meta-spinner.de/\r
-#              added Mini-reptile\r
-#              added Misterbot http://www.misterbot.fr/\r
-#              added Miva http://www.miva.com/\r
-#              added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_m_141105_2\b\r
-#              added MSRBOT http://research.microsoft.com/research/sv/msrbot/\r
-#              added MS SharePoint Portal Server - MS Search 4.0 Robot http://support.microsoft.com/default.aspx?scid=kb;en-us;284022\r
-#              added Mydoyouhike http://www.doyouhike.net/my\r
-#              added NASA Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b\r
-#              added NetSprint http://www.netsprint.pl/serwis/\r
-#              added NimbleCrawler http://www.healthline.com/\r
-#              added OpenWebSpider http://www.openwebspider.org/\r
-#              added Oracle Ultra Search http://www.oracle.com/technology/products/ultrasearch/index.html\r
-#              added OSSProxy http://www.marketscore.com/FAQ.Aspx\r
-#              added passwordmaker.org http://passwordmaker.org/\r
-#              added PEAR HTTP Request class http://pear.php.net/\r
-#              added PEERbot http://www.peerbot.com/\r
-#              added PHP version tracker http://www.nexen.net/phpversion/bot.php\r
-#              added PictureOfInternet http://malfunction.org/poi/\r
-#              added plinki http://www.plinki.com/\r
-#              added Port Huron Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1133\b\r
-#              added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1135\b\r
-#              added ProjectWF-java-test-crawler\r
-#              added PyQuery http://sourceforge.net/projects/pyquery/\r
-#              added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/\r
-#              added Scumbot\r
-#              added Sensis Web Crawler http://www.sensis.com.au/\r
-#              added snap.com beta crawler http://www.snap.com/\r
-#              added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/\r
-#              added STEROID  Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm\r
-#              added Suchfin-Bot http://www.suchfin.de/\r
-#              added Sunrise http://www.sunrisexp.com/\r
-#              added Tagyu Agent http://www.tagyu.com/\r
-#              added Tcl http client package http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm\r
-#              added TeragramCrawlerSURF http://www.teragram.com/\r
-#              added Test Crawler http://netp.ath.cx/\r
-#              added UnChaos Bot Hybrid Web Search Engine http://www.unchaos.com/\r
-#              added unido-bot http://www.unchina.org/unido/unido/our_projects/3_3.html\r
-#              added UniversalFeedParser http://feedparser.org/ (seen from md301000.inktomisearch.com)\r
-#              added updated http://www.updated.com/\r
-#              added Vermut http://vermut.aol.com\r
-#              added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html\r
-#              added Vespa Crawler (Yahoo Norway?) http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb\r
-#              added VSE http://www.vivisimo.com/\r
-#              added webcrawl.net http://www.webcrawl.net/\r
-#              added Web Downloader http://www.krasu.ru/soft/chuchelo/\r
-#              added Webdup http://www.webdup.com/en/index.html\r
-#              added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b\r
-#              added WordPress http://wordpress.org/\r
-#              added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/\r
-#              added Xenu's Link Sleuth (with ')\r
-#              added xirq http://www.xirq.com/\r
-#              added yoogliFetchAgent http://www.yoogli.com/\r
-#              added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/\r
-#              -- fix - some robots were reported with _ where _ should have been a space.\r
-#              changed Xenu Link Sleuth\r
-#              changed microsoft[_+\s]url[_+\s]control -> microsoft_url_control\r
-#              changed favorites_sweeper -> favorites_sweeper\r
-#              -- updates\r
-#              updated AskJeeves to Ask\r
-# 2012-06-05 Albrecht Mueller\r
-#              added Grabber from SDSC (San Diego Supercomputer Center).\r
-# 2013-09-30 Albrecht Mueller\r
-# AWStats probably cannot detect this bot as it identifies itself in\r
-# the referrer field and not in the user agent string.\r
-#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"\r
-#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"\r
-#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"\r
-\r
-# to do  MS Search 4.0 Robot\r
-\r
-#package AWSROB;\r
-\r
-\r
-# Robots list was found at http://www.robotstxt.org/wc/active/all.txt\r
-# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html\r
-# Rem: To avoid bad detection, some robot's ids were removed from this list:\r
-#      - Robots with ID of 3 letters only\r
-#      - Robots called 'webs' and 'tcl'\r
-# Rem: directhit changed into direct_hit (its real id)\r
-# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser\r
-# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser\r
-# Rem: roadrunner changed into road_runner\r
-# Rem: lycos changed to lycos_ to avoid confusion with lycos-online browser\r
-# Rem: voyager changed into ^voyager\/ to avoid to exclude voyager and amigavoyager browser\r
-\r
-# RobotsSearchIDOrder\r
-# It contains all matching criteria to search for in log fields. This list is\r
-# used to know in which order to search Robot IDs.\r
-# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more\r
-# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more\r
-# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+\s]' and are quoted.\r
-#-------------------------------------------------------\r
-\r
-\r
-@RobotsSearchIDOrder_list1 = (\r
-# Common robots (In robot file)\r
-'bingbot/',\r
-'bingpreview',\r
-'MSIECrawler',\r
-'msnbot/',\r
-'msnbot\-media/',\r
-'AdIdxBot/',\r
-'NOT[\x20]Googlebot/',\r
-'Googlebot/',\r
-'Google[\x20]Web[\x20]Preview',\r
-'Googlebot\-Image/',\r
-'Googlebot\-Mobile/',\r
-'Google[\x20]Page[\x20]Speed',\r
-'google\-sitemaps',\r
-'Googlebot\-News',\r
-'Googlebot\-Video/',\r
-'AdsBot\-Google[\x20]\(',\r
-'AdsBot\-Google\-Mobile\-Apps',\r
-'Adsbot',\r
-'Mediapartners-Google',\r
-'Feedfetcher\-Google',\r
-'Google\-Adwords\-Instant',\r
-'Firefox/1\.5',\r
-'Yahoo![\x20]Slurp[\x20]China',\r
-'Yahoo![\x20]Slurp',\r
-'Baiduspider/',\r
-'Baiduspider\-image',\r
-'Baiduspider-',\r
-'YandexBot/',\r
-'YandexImages/',\r
-'YandexImageResizer',\r
-'YandexMetrika/',\r
-'YandexMobileBot/',\r
-'yandex',\r
-'electricmonk/',\r
-'spbot/',\r
-'SeznamBot/',\r
-'msie8',\r
-'AhrefsBot/',\r
-'007ac9[\x20]Crawler',\r
-'2345Explorer/',\r
-'360Spider',\r
-'A[\x20]Simple[\x20]Crawler',\r
-'Abrave',\r
-'acapbot/',\r
-'Accoona\-AI\-Agent/',\r
-'arcemedia',\r
-'AdnormCrawlerCatchBot/',\r
-'adscanner',\r
-'aiHitBot/',\r
-'aipbot/',\r
-'AlphaBot',\r
-'Apache\-HttpClient/',\r
-'Apexoo[\x20]Spider',\r
-'Applebot/',\r
-'archive\.org_bot',\r
-'Babya[\x20]Discoverer',\r
-'Barkrowler',\r
-'BDCbot/',\r
-'BellPagesCA/',\r
-'BeNosey[\x20]Mohawk[\x20]Search',\r
-'bhcBot',\r
-'bidswitchbot',\r
-'BigBozz/',\r
-'BinGet/',\r
-'bitlybot',\r
-'bl\.uk_lddc_bot/',\r
-'BLEXBot/',\r
-'bnf.fr_bot',\r
-'boitho\.com\-dc/',\r
-'BoogleBot',\r
-'BusinessBot:',\r
-'BW/',\r
-'Bytespider',\r
-'CatchBot/',\r
-'CB/Nutch',\r
-'CCBot/',\r
-'CheckMarkNetwork/',\r
-'Cliqzbot/',\r
-'CMS[\x20]Crawler',\r
-'Companybook\-Crawler',\r
-'ConveraCrawler/',\r
-'Contacts-Crawler',\r
-'contxbot',\r
-'cosmos/',\r
-'crawl/Nutch',\r
-'crawler4j',\r
-'CRAZYWEBCRAWLER',\r
-'CRMNLCrawlAgent',\r
-'CSE[\x20]HTML[\x20]Validator',\r
-'C\-T[\x20]bot',\r
-'CUBOT',\r
-'Curl/PHP',\r
-'cyencebot',\r
-'DataCrawler/',\r
-'daumoa',\r
-'daum',\r
-'Deepnet[\x20]Explorer',\r
-'DeuSu/',\r
-'Digincore',\r
-'Discordbot/',\r
-'Dispatch/',\r
-'DnyzBot',\r
-'DoCoMo/',\r
-'Domain[\x20]Re\-Animator[\x20]Bot',\r
-'DomainCrawler/',\r
-'DomainMacroCrawler/',\r
-'DomainSONOCrawler/',\r
-'DomainStatsBot/',\r
-'DotBot/',\r
-'DuckDuckBot-Https',\r
-'DuckDuckBot',\r
-'DuckDuckGo\-Favicons\-Bot/',\r
-'ELinks/',\r
-'ELinks[\x20]\(',\r
-'EmailMarketingRobot/',\r
-'EmeraldShield\.com[\x20]WebBot',\r
-'envolk\[ITS\]spider/',\r
-'eright',\r
-'EsperanzaBot',\r
-'Exabot/',\r
-'ExtLinksBot',\r
-'ExperianCrawlUK',\r
-'facebookexternalhit/',\r
-'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de',\r
-'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de',\r
-'FAST\-WebCrawler/',\r
-'Feosey[\x20]Mohk[\x20]Crawler',\r
-'findlinks/',\r
-'Findxbot/',\r
-'FirePHP/',\r
-'firstdirectory\-bot',\r
-'flamingo',\r
-'FlippyBearBot/',\r
-'^foo$',\r
-'Foregenix[\x20]Web[\x20]Scan',\r
-'FreeWebMonitoring[\x20]SiteChecker/',\r
-'fujilabol',\r
-'FurlBot/',\r
-'Gaisbot/',\r
-'Gallent[\x20]Spider',\r
-'GarlikCrawler/',\r
-'Getintent[\x20]Crawler',\r
-'GetintentCrawler[\x20]getintent\.com',\r
-'Gigabot/',\r
-'gipo\-crawler/Nutch',\r
-'Girafabot',\r
-'Gluten[\x20]Free[\x20]Crawler/',\r
-'gocrawl',\r
-'Gowikibot',\r
-'Go\-http\-client/',\r
-'GrapeshotCrawler/',\r
-'GSiteCrawler/',\r
-'GurujiBot/',\r
-'hadiBot',\r
-'HaosouSpider',\r
-'HELLO[\x20]Crawler',\r
-'holmes/',\r
-'houzzbot',\r
-'HTTP_Request2/',\r
-'HubSpot[\x20]Webcrawler',\r
-'HyperCrawl/',\r
-'ICC\-Crawler/',\r
-'iconoclast',\r
-'IDGCrawler/Nutch',\r
-'IDG/UK',\r
-'idmarch[\x20]Automatic\.beta/',\r
-'InbyBot',\r
-'Incutio[\x20]XML',\r
-'IndeedBot',\r
-'InfluenceBot',\r
-'IonCrawl',\r
-'IRLbot/',\r
-'IssueCrawler',\r
-'istellabot/',\r
-'James[\x20]BOT',\r
-'Jigsaw/',\r
-'JobFeed',\r
-'Jooblebot',\r
-'KomodiaBot/',\r
-'Konqueror/',\r
-'laserlikebot',\r
-'Lightspeed',\r
-'linkapediabot',\r
-'metager\-linkchecker',\r
-'Linguee[\x20]Bot',\r
-'linkchecker',\r
-'LinkCheck',\r
-'linkdexbot/',\r
-'LinkedInBot/',\r
-'LinkpadBot/',\r
-'Links[\x20]\(',\r
-'LinksManager\.com_bot',\r
-'LWP::Simple/',\r
-'Mail\.RU_Bot/',\r
-'makecontact',\r
-'mappy',\r
-'MauiBot',\r
-'meanpathbot/',\r
-'Mechanize',\r
-'Mediatoolkitbot',\r
-'MegaIndex\.ru/',\r
-'merzscope',\r
-'Meta_Bot',\r
-'mfibot/',\r
-'microsoft.*discovery',\r
-'missigua_locator',\r
-'MixrankBot',\r
-'MJ12bot/',\r
-'MojeekBot',\r
-'Mojolicious',\r
-'MXT/Nutch',\r
-'My[\x20]Nutch[\x20]Spider/',\r
-'myse/Nutch',\r
-'Naaraa',\r
-'Neevabot',\r
-'NerdyBot',\r
-'netEstate[\x20]NE[\x20]Crawler',\r
-'NetResearchServer/',\r
-'Nimbostratus-Bot',\r
-'nominet',\r
-'NRLCorpusBuilder/Nutch',\r
-'nutch\-1\.4/',\r
-'nutch\-1\.8/',\r
-'NutchCVS/',\r
-'o\.uk[\x20]robot',\r
-'ocrawler;',\r
-'ODP[\x20]link[\x20]checker',\r
-'Offline[\x20]Explorer/',\r
-'OmniExplorer_Bot/',\r
-'OrangeBot/',\r
-'Orliac',\r
-'OutclicksBot',\r
-'PageBitesHyperBot/',\r
-'Pcore',\r
-'pdffillerbot/',\r
-'peopleman',\r
-'PetalBot',\r
-'PhantomJS',\r
-'PHP/5\.2\.8',\r
-'Pinterestbot',\r
-'PiplBot',\r
-'Ploetz[\x20]\+[\x20]Zeller',\r
-'Plukkie/',\r
-'Princetonbot/',\r
-'PrivacyAwareBot/',\r
-'Prlog/',\r
-'proximic',\r
-'psbot/',\r
-'psbot\-image',\r
-'python_wk_crawler',\r
-'Python\-urllib/',\r
-'QCrawl',\r
-'Quick-Crawler',\r
-'ResearchBot',\r
-'roboto',\r
-'rogerbot/',\r
-'RSSingBot',\r
-'RukiCrawler/',\r
-'SafeDNS[\x20]search[\x20]bot/',\r
-'SafeDNSBot',\r
-'SafeSearch[\x20]microdata[\x20]crawler',\r
-'safesearch',\r
-'SBL\-BOT',\r
-'scrapy',\r
-'Screaming[\x20]Frog[\x20]SEO[\x20]Spider/',\r
-'ScreenerBot[\x20]Crawler[\x20]Beta',\r
-'Scrubby',\r
-'Searchie/',\r
-'SecurityResearch\.bot',\r
-'Seekmo',\r
-'semanticbot',\r
-'SemrushBot/',\r
-'SemrushBot-SI',\r
-'seo\-audit\-check\-bot/',\r
-'Seobility',\r
-'SEOkicks\-Robot',\r
-'SEOlyticsCrawler/',\r
-'SEOstats',\r
-'Seosys/Nutch',\r
-'Seoterritory\.com[\x20]bot',\r
-'serendeputy',\r
-'Shim\-Crawler',\r
-'SiteExplorer/',\r
-'siteexplorer\.info',\r
-'siteimprove',\r
-'Slackbot\-LinkExpanding',\r
-'SmabblerBot/',\r
-'Sogou[\x20]web[\x20]spider/',\r
-'special_archiver/',\r
-'Spiderbot/',\r
-'SpuhexBot',\r
-'spyonweb',\r
-'ssearch_bot',\r
-'Streamline3Bot',\r
-'SurdotlyBot/',\r
-'SurveyBot/',\r
-'taiil/Nutch',\r
-'tbot\-nutch',\r
-'TeeRaidBot',\r
-'TelegramBot',\r
-'Test/Nutch',\r
-'Test[\x20]Spider',\r
-'TestCrawler',\r
-'The[\x20]Knowledge[\x20]AI',\r
-'TkBot',\r
-'tracemyfile',\r
-'trendiction',\r
-'TurnitinBot/',\r
-'TurnitinBot',\r
-'TweetmemeBot/',\r
-'UCY/Nutch',\r
-'uni-leipzig\.de',\r
-'Uptimebot/',\r
-'UptimeRobot/',\r
-'URL[\x20]Checker',\r
-'UXCrawlerBot',\r
-'Validator\.nu/',\r
-'vBSEO',\r
-'vBulletin[\x20]via[\x20]PHP',\r
-'vebidoobot',\r
-'vegi[\x20]bot',\r
-'Velen',\r
-'viz/Nutch',\r
-'VoilaBot',\r
-'VORTEX/',\r
-'voyager/',\r
-'vuhuvBot',\r
-'W3C_Validator/',\r
-'W3C\-checklink/',\r
-'WBSearchBot/',\r
-'WbSrch/',\r
-'WeSEE:Ads/PageBot',\r
-'WeSEE:Ads/PictureBot',\r
-'WeSEE_Bot',\r
-'Wget/',\r
-'Who\.is[\x20]Bot',\r
-'wonderbot/',\r
-'woobot/',\r
-'Wotbox/',\r
-'Xaldon[\x20]WebSpider',\r
-'Xenu[\x20]Link[\x20]Sleuth',\r
-'xenu_link_sleuth',\r
-'XML[\x20]Sitemaps[\x20]Generator',\r
-'XoviBot/',\r
-'yacybot',\r
-'Yahoo[\x20]Link[\x20]Preview',\r
-'yak',\r
-'YisouSpider',\r
-'yoozBot',\r
-'Your\-Website\-Sucks',\r
-'zoominfobot',\r
-'zspider/',\r
-'ZumBot/',\r
-# below placed at end to catch some generics\r
-'ng/1\.',\r
-'ng/2\.',\r
-'libwww\-perl',\r
-'urllib',\r
-'javabee',\r
-'projectwf\-java\-test\-crawler',\r
-'java',\r
-'loocalcrawler/nutch',\r
-'nutchosu\-vlib',\r
-'nutch',\r
-'perlcrawler',\r
-'perl',\r
-# old robots using firefox < version 11 not identifying themselves as a robot.\r
-'(firefox/)([0-9]\.|[0-1][0]\.)'\r
-);\r
-\r
-@RobotsSearchIDOrder_list2 = (\r
-# Less common robots (In robot file)\r
-'^Mozilla$',\r
-'^mozilla\/3\.0\s\(compatible$',\r
-'^mozilla\/4\.0$',\r
-'^mozilla\/4\.0\s\(compatible;\)$',\r
-'^mozilla\/5\.0$',\r
-'^mozilla\/5\.0\s\(compatible;$',\r
-'^mozilla\/5\.0\s\(en\-us\)$',\r
-'^mozilla\/5\.0\sfirefox\/3\.0\.5$',\r
-'^Mozilla/6\.0[\x20]\(compatible\)$',\r
-'^Mozilla/(.*)Beta[\x20]\(Windows\)',\r
-'MSIE[\x20]2',\r
-'MSIE[\x20]3',\r
-'MSIE[\x20]4',\r
-'MSIE[\x20]5',\r
-'MSIE[\x20]6',\r
-'MSIE\+6\.0\;',\r
-'Windows[\x20]95',\r
-'Windows[\x20]98',\r
-\r
-# these could be removed to speed up processing as they are rarely seen\r
-'a6\-indexer',\r
-'abcdatos',\r
-'abonti\.com',\r
-'acme\.spider',\r
-'activebookmark',\r
-'adamm_bot',\r
-'advbot',\r
-'affectv\.co\.uk',\r
-'ahoythehomepagefinder',\r
-'aleadsoftbot',\r
-'alkaline',\r
-'allrati',\r
-'alltop',\r
-'almaden',\r
-'alpha_search_agent',\r
-'anthill',\r
-'antibot',\r
-'aport',\r
-'appie',\r
-'applesyndication',\r
-'arachnophilia',\r
-'arale',\r
-'araneo',\r
-'architext',\r
-'archive\-de\.com',\r
-'aretha',\r
-'argus',\r
-'ariadne',\r
-'arianna\.libero\.it',\r
-'arks',\r
-'aspider',\r
-'aspseek',\r
-'asterias',\r
-'asynchttpclient',\r
-'atn\.txt',\r
-'atomz',\r
-'auresys',\r
-'awbot',\r
-'backlinktest\.com',\r
-'backrub',\r
-'bbot',\r
-'becomebot',\r
-'bender',\r
-'betabot',\r
-'bigbrother',\r
-'biglotron',\r
-'BingLocalSearch',\r
-'bittorrent_bot',\r
-'biz360[_+\s]spider',\r
-'bjaaland',\r
-'blackwidow',\r
-'blindekuh',\r
-'blogbridge[_+\s]service',\r
-'blogged_crawl',\r
-'bloglines',\r
-'bloglovin',\r
-'blogpulse',\r
-'blogsearch',\r
-'blogshares',\r
-'blogslive',\r
-'blogssay',\r
-'bloodhound',\r
-'bncf\.firenze\.sbn\.it/raccolta\.txt',\r
-'bobby',\r
-'bookmark\-manager',\r
-'borg\-bot',\r
-'boris',\r
-'brightnet',\r
-'bruinbot',\r
-'bspider',\r
-'bubing',\r
-'bumblebee',\r
-'butterfly',\r
-'buzztracker',\r
-'cactvschemistryspider',\r
-'calif[^r]',\r
-'candlelight[_+\s]favorites[_+\s]inspector',\r
-'careerbot',\r
-'carpathia',\r
-'cassandra',\r
-'catbot',\r
-'cbn00glebot',\r
-'cerberian_drtrs',\r
-'cfetch',\r
-'cgireader',\r
-'chattertrap',\r
-'check_http',\r
-'checkbot',\r
-'checkweb_link_validator',\r
-'christcrawler',\r
-'churl',\r
-'cienciaficcion',\r
-'cipinetbot',\r
-'imagecoccoc',\r
-'coccoc',\r
-'coldfusion',\r
-'collective',\r
-'combine',\r
-'commons\-httpclient',\r
-'computer_and_automation_research_institute_crawler',\r
-'conceptbot',\r
-'contentmatch',\r
-'converamultimediacrawler',\r
-'coolbot',\r
-'copubbot',\r
-'core',\r
-'covario',\r
-'cruiser',\r
-'cscrawler',\r
-'cuasarbot',\r
-'cursor',\r
-'cusco',\r
-'custo',\r
-'cyberspyder',\r
-'datafountains/dmoz_downloader',\r
-'dataprovider\.com',\r
-'daviesbot',\r
-'daylifefeedfetcher',\r
-'daypopbot',\r
-'deepindex',\r
-'desertrealm',\r
-'deweb',\r
-'dienstspider',\r
-'digger',\r
-'digout4u',\r
-'diibot',\r
-'dipsie\.bot',\r
-'direct_hit',\r
-'discobot',\r
-'dlvr\.it',\r
-'dnabot',\r
-'dnsgroup',\r
-'doccheckbot',\r
-'domainappender',\r
-'domainchecker',\r
-'domainsdb\.net',\r
-'download_express',\r
-'dragonbot',\r
-'dreamwidth',\r
-'drupal',\r
-'dulance',\r
-'dumbot',\r
-'dumm\.de\-bot',\r
-'dwcp',\r
-'e\-collector',\r
-'earthcom\.info',\r
-'easydl',\r
-'ebiness',\r
-'eccp',\r
-'echo!',\r
-'edgeio\-retriever',\r
-'elfinbot',\r
-'emacs',\r
-'emcspider',\r
-'enteprise',\r
-'ernst[:blank:]2\.0',\r
-'esther',\r
-'ets_v',\r
-'eventax',\r
-'everbeecrawler',\r
-'everest\-vulcan',\r
-'evliyacelebi',\r
-'exactseek',\r
-'extreme[_+\s]picture[_+\s]finder',\r
-'ezoom',\r
-'ezresult',\r
-'facebook',\r
-'facebot',\r
-'fast\-search\-engine',\r
-'matrix_s\.p\.a\._\-_fast_enterprise_crawler',\r
-'fast_enterprise_crawler',\r
-'fastbot',\r
-'fastcrawler',\r
-'favicon',\r
-'favorg',\r
-'favorites_sweeper',\r
-'fdse',\r
-'feedburner',\r
-'feedcrawl',\r
-'feedflow',\r
-'feedmyinbox',\r
-'feedroll\.com',\r
-'feedsky',\r
-'feedster',\r
-'feedvalidator',\r
-'feedzira',\r
-'felix',\r
-'ferret',\r
-'fetchbot',\r
-'fetchrover',\r
-'fever/',\r
-'fido',\r
-'filmkamerabot',\r
-'filterdb\.iss\.net',\r
-'finderlein[_+\s]research[_+\s]crawler',\r
-'findexa_crawler',\r
-'finnish',\r
-'fireball',\r
-'firmilybot',\r
-'flexum',\r
-'foaf\-search\.net',\r
-'fooky\.com/ScorpionBot',\r
-'fouineur',\r
-'francoroute',\r
-'freecrawl',\r
-'freenews',\r
-'funnelweb',\r
-'g2crawler',\r
-'gama',\r
-'gazz',\r
-'gcreep',\r
-'geniebot',\r
-'genieo',\r
-'geohasher',\r
-'getbot',\r
-'geturl',\r
-'gigablastopensource',\r
-'global_fetch',\r
-'gnodspider',\r
-'goforit\.com',\r
-'goforitbot',\r
-'golem',\r
-'gonzo',\r
-'gougou',\r
-'gpu_p2p_crawler',\r
-'grabber',\r
-'grapeshot',\r
-'grapnel',\r
-'griffon',\r
-'gromit',\r
-'grub',\r
-'gulliver',\r
-'gulperbot',\r
-'hambot',\r
-'hanrss',\r
-'harvest',\r
-'havindex',\r
-'henrythemiragorobot',\r
-'heritrix',\r
-'hl_ftien_spider',\r
-'hometown',\r
-'hoowwwer',\r
-'hpprint',\r
-'htdig',\r
-'html[_+\s]link[_+\s]validator',\r
-'htmlgobble',\r
-'htmlparser',\r
-'httrack',\r
-'hundesuche\.com\-bot',\r
-'hyperdecontextualizer',\r
-'ia_archiver\-web\.archive\.org',\r
-'ia_archiver',\r
-'iajabot',\r
-'iaskspider',\r
-'i\-bot',\r
-'icarus6j',\r
-'ichiro',\r
-'icjobs\.de',\r
-'ilse',\r
-'iltrovatore\-setaccio',\r
-'imagelock',\r
-'implisensebot',\r
-'inagist',\r
-'incywincy',\r
-'infobot',\r
-'infociousbot',\r
-'infohelfer',\r
-'infomine',\r
-'informant',\r
-'infoseeksidewinder',\r
-'infoseek',\r
-'infospider',\r
-'inspectorwww',\r
-'insurancobot',\r
-'integromedb\.org',\r
-'intelliagent',\r
-'internet[_+\s]ninja',\r
-'internetarchive',\r
-'internetseer',\r
-'internetsupervision',\r
-'ips\-agent',\r
-'irobot',\r
-'iron33',\r
-'isearch2006',\r
-'israelisearch',\r
-'iupui_research_bot',\r
-'izsearch',\r
-'jacobin[\x20]club',\r
-'jakarta',\r
-'jbot',\r
-'jcrawler',\r
-'jeeves',\r
-'jennybot',\r
-'jobboerse',\r
-'jobot',\r
-'jobo',\r
-'joebot',\r
-'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility',\r
-'js\-kit',\r
-'jubii',\r
-'jumpstation',\r
-'justview',\r
-'kalambot',\r
-'kamano\.de_newsfeedverzeichnis',\r
-'kapsi',\r
-'katipo',\r
-'kazoombot',\r
-'kevin',\r
-'keyoshid',\r
-'kilroy',\r
-'kinja\-imagebot',\r
-'kinjabot',\r
-'knowitall',\r
-'knowledge\.com',\r
-'ko[_+\s]yappo[_+\s]robot',\r
-'kouaa_krawler',\r
-'krugle',\r
-'ksibot',\r
-'kummhttp',\r
-'kurzor',\r
-'labelgrabber\.txt',\r
-'lanshanbot',\r
-'larbin',\r
-'largesmall[\x20]crawler',\r
-'legs',\r
-'letscrawl\.com',\r
-'libcrawl',\r
-'lilina',\r
-'link_valet_online',\r
-'linkbot',\r
-'linkdex\.com',\r
-'linkidator',\r
-'linkscan',\r
-'linkstats[\x20]bot',\r
-'linkwalker',\r
-'lipperhey',\r
-'livejournal\.com',\r
-'lmspider',\r
-'loadtimebot',\r
-'lockon',\r
-'logo_gif',\r
-'longurl',\r
-'lssrocketcrawler',\r
-'ltbot',\r
-'ltx71',\r
-'lwp\-request',\r
-'lwp\-trivial',\r
-'lycos[_+\s]',\r
-'macworm',\r
-'madaali\.de',\r
-'magpierss',\r
-'magpie',\r
-'mapoftheinternet\.com',\r
-'marvin',\r
-'mattie',\r
-'mediabot',\r
-'mediafox',\r
-'megaindex',\r
-'megite',\r
-'memorybot',\r
-'mercator',\r
-'meshexplorer',\r
-'metager2\-verification\-bot',\r
-'metajobbot',\r
-'metaspinner',\r
-'metauri',\r
-'miadev',\r
-'microsoft[_+\s]url[_+\s]control',\r
-'microsoft[\x20]bits',\r
-'microsoft\-webdav\-miniredir',\r
-'mindcrawler',\r
-'mindupbot',\r
-'mini\-reptile',\r
-'minirank',\r
-'misterbot',\r
-'miva',\r
-'mizzu_labs',\r
-'mnogosearch',\r
-'moget',\r
-'momspider',\r
-'monster',\r
-'motor',\r
-'movabletype',\r
-'ms[_+\s]search[_+\s]6\.0[_+\s]robot',\r
-'ms_search_4\.0_robot',\r
-'msnbot\-udiscovery',\r
-'msrabot',\r
-'msrbot',\r
-'mt::telegraph::agent',\r
-'muncher',\r
-'muscatferret',\r
-'mwdsearch',\r
-'mydoyouhike',\r
-'myweb',\r
-'nagios',\r
-'nasa_search',\r
-'ndspider',\r
-'nederland\.zoek',\r
-'netcarta',\r
-'netcraft',\r
-'netluchs',\r
-'netmechanic',\r
-'netnewswire',\r
-'netscoop',\r
-'netsprint',\r
-'netvibes',\r
-'newrelicpinger',\r
-'newscan\-online',\r
-'newsfox',\r
-'newsgatoronline',\r
-'nextgensearchbot',\r
-'nhse',\r
-'nicebot',\r
-'nimblecrawler',\r
-'ning',\r
-'nomad',\r
-'northstar',\r
-'noxtrumbot',\r
-'npbot',\r
-'nzexplorer',\r
-'objectssearch',\r
-'occam',\r
-'ocelli',\r
-'octopus',\r
-'octora_beta_bot',\r
-'onet\.pl[_+\s]sa',\r
-'onfolio',\r
-'openfind',\r
-'opentaggerbot',\r
-'openwebspider',\r
-'optimizer',\r
-'oracle_ultra_search',\r
-'orb_search',\r
-'orbiter',\r
-'packrat',\r
-'pageboy',\r
-'panscient',\r
-'parasite',\r
-'passwordmaker\.org',\r
-'patric',\r
-'pear_http_request_class',\r
-'peerbot',\r
-'pegasus',\r
-'perignator',\r
-'perman',\r
-'petersnews',\r
-'phantom',\r
-'php[_+\s]version[_+\s]tracker',\r
-'phpcrawl',\r
-'phpdig',\r
-'picmole',\r
-'pictureofinternet',\r
-'piltdownman',\r
-'pimptrain',\r
-'ping\.blo\.gs',\r
-'pingdom',\r
-'pioneer',\r
-'pita',\r
-'pitkow',\r
-'pjspider',\r
-'plinki',\r
-'pluckfeedcrawler',\r
-'plumtreewebaccessor',\r
-'pogodak',\r
-'pompos',\r
-'popdexter',\r
-'poppi',\r
-'port_huron_labs',\r
-'portalb',\r
-'postfavorites',\r
-'postpost',\r
-'postrank',\r
-'powermarks',\r
-'printfulbot',\r
-'proodlebot',\r
-'protopage',\r
-'publiclibraryarchive',\r
-'pyquery',\r
-'python',\r
-'qihoobot',\r
-'quipply',\r
-'qwantify',\r
-'r6\_',\r
-'rambler',\r
-'ratingburner',\r
-'raven',\r
-'rbse',\r
-'redalert',\r
-'regator',\r
-'relevantnoise\.com',\r
-'resumerobot',\r
-'rhcs',\r
-'riddler',\r
-'road_runner',\r
-'robbie',\r
-'robi',\r
-'robocrawl',\r
-'robofox',\r
-'robozilla',\r
-'rojo',\r
-'rome[\x20]client',\r
-'roverbot',\r
-'rpt\-httpclient',\r
-'rssgraffiti',\r
-'rssimagesbot',\r
-'ruffle',\r
-'rufusbot',\r
-'rules',\r
-'safeads\.xyz',\r
-'safetynetrobot',\r
-'sage\+\+',\r
-'sandcrawler',\r
-'savetheworldheritage',\r
-'sbider',\r
-'schizozilla',\r
-'scooter',\r
-'scoutjet',\r
-'scumbot',\r
-'search\-info',\r
-'search_au',\r
-'searchguild[_+\s]dmoz[_+\s]experiment',\r
-'searchmetricsbot',\r
-'searchprocess',\r
-'seekbot',\r
-'semalt',\r
-'senrigan',\r
-'sensis_web_crawler',\r
-'seodiver',\r
-'seokicks\.de',\r
-'seoscanners',\r
-'sgscout',\r
-'shaggy',\r
-'shaihulud',\r
-'shareaholicbot',\r
-'shoutcast',\r
-'sift',\r
-'simbot',\r
-'simplepie',\r
-'sistrix',\r
-'site\-valet',\r
-'sitebot',\r
-'sitedomain\-bot',\r
-'sitetech',\r
-'skimbot',\r
-'skymob',\r
-'slcrawler',\r
-'slurp',\r
-'slysearch',\r
-'smartspider',\r
-'smtbot',\r
-'snap\.com_beta_crawler',\r
-'snappy',\r
-'snooper',\r
-'sohu\-search',\r
-'sohu',\r
-'solbot',\r
-'speedy',\r
-'sphere_scout',\r
-'spider[_+\s]monkey',\r
-'spiderline',\r
-'spiderlytics',\r
-'spiderman',\r
-'spiderview',\r
-'spip',\r
-'sproose_crawler',\r
-'spry',\r
-'sqworm',\r
-'ssearcher',\r
-'steeler',\r
-'steroid__download',\r
-'stq_bot',\r
-'Stratagems[\x20]Kumo',\r
-'suchfin\-bot',\r
-'suke',\r
-'summify\.com',\r
-'sunrise',\r
-'suntek',\r
-'superbot',\r
-'superfeedr',\r
-'susie',\r
-'sven',\r
-'syndic8',\r
-'syndicapi',\r
-'synoobot',\r
-'synthesio',\r
-'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',\r
-'tach_bw',\r
-'tagyu_agent',\r
-'tailrank',\r
-'tarantula',\r
-'tarspider',\r
-'tcl_http_client_package',\r
-'techbot',\r
-'technoratibot',\r
-'templeton',\r
-'teoma',\r
-'teragramcrawlersurf',\r
-'test_crawler',\r
-'testbot',\r
-'thumbsniper',\r
-'titan',\r
-'titin',\r
-'tkwww',\r
-'tlspider',\r
-'topblogsinfo',\r
-'topicblogs',\r
-'topix\.net',\r
-'trapit',\r
-'trileet',\r
-'turtlescanner',\r
-'turtle',\r
-'tutorgigbot',\r
-'tweetedtimes',\r
-'twiceler',\r
-'twisted[\x20]pagegetter',\r
-'twitterbot',\r
-'twitterfeed',\r
-'ubicrawler',\r
-'ucsd',\r
-'udmsearch',\r
-'ultraseek',\r
-'um\-IC',\r
-'um\-LN',\r
-'unchaos_bot_hybrid_web_search_engine',\r
-'unido\-bot',\r
-'unisterbot',\r
-'universalfeedparser',\r
-'unlost_web_crawler',\r
-'unwindfetchor',\r
-'updated',\r
-'urlck',\r
-'ustc\-semantic\-group',\r
-'vagabondo\-wap',\r
-'vagabondo',\r
-'valkyrie',\r
-'vermut',\r
-'versus_crawler_from_eda\.baykan@epfl\.ch',\r
-'verticrawl',\r
-'vespa_crawler',\r
-'victoria',\r
-'virus[_+\s]detector',\r
-'visionsearch',\r
-'voidbot',\r
-'voltron',\r
-'vse/',\r
-'vwbot',\r
-'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa',\r
-'w3index',\r
-'w3m2',\r
-'wallpaper',\r
-'wanderer',\r
-'wapspider',\r
-'wapspIRLider',\r
-'watchmouse',\r
-'wavefire',\r
-'waybackarchive\.org',\r
-'wazzup',\r
-'web_downloader',\r
-'webbandit',\r
-'webbase',\r
-'webcatcher',\r
-'webclipping\.com',\r
-'webcollage',\r
-'webcompass',\r
-'webcopy',\r
-'webcrawl\.net',\r
-'webdup',\r
-'webfetcher',\r
-'webfilter',\r
-'webfoot',\r
-'webinator',\r
-'webindexer',\r
-'weblayers',\r
-'weblinker',\r
-'webminer',\r
-'webmirror',\r
-'webmoose',\r
-'webquest',\r
-'webreader',\r
-'webreaper',\r
-'website[_+\s]monitoring[_+\s]bot',\r
-'websnarf',\r
-'webspider',\r
-'webvac',\r
-'webvulncrawl',\r
-'webwalker',\r
-'webwalk',\r
-'webwatch',\r
-'wells_search',\r
-'wer\-liefert\-was',\r
-'wesee:search',\r
-'wevikabot',\r
-'whatuseek',\r
-'whowhere',\r
-'windows\-rss\-platform',\r
-'wired\-digital',\r
-'zyborg',\r
-'wisenutbot',\r
-'wiumi',\r
-'wmir',\r
-'wolp',\r
-'wombat',\r
-'wonderer',\r
-'woozweb',\r
-'wordpress',\r
-'worm',\r
-'wume_crawler',\r
-'wwwc',\r
-'wwweasel',\r
-'wz101',\r
-'xget',\r
-'xirq',\r
-'xydo',\r
-'y!j',\r
-'yahoo![\x20]searchmonkey',\r
-'yahoo!_mindset',\r
-'yahoo\-blogs',\r
-'yahoo\-mmcrawler',\r
-'yahoo\-newscrawler',\r
-'yahoo[\x20]pipes',\r
-'yahoo\-verticalcrawler',\r
-'yahoocachesystem',\r
-'yahooexternalcache',\r
-'yahoofeedseeker',\r
-'yahooseeker\-testing',\r
-'yahooseeker',\r
-'yahooysmcm',\r
-'yammer',\r
-'yanga',\r
-'yet\-another\-spider',\r
-'yeti',\r
-'yie8',\r
-'yodaobot',\r
-'yooglifetchagent',\r
-'youdao',\r
-'yourls',\r
-'z\-add_link_checker',\r
-'zealbot',\r
-'zemanta',\r
-'zend_http_client',\r
-'zeus',\r
-'zhuaxia',\r
-'[^a]fish',\r
-'[\x20]netseer[\x20]',\r
-'^[1-3]$',\r
-'^finbot',\r
-'^motorola$',\r
-'^msie',\r
-'^voyager/',\r
-'^webindex$',\r
-'1\-more_scanner',\r
-'nbot'\r
-);\r
-\r
-@RobotsSearchIDOrder_listgen = (\r
-# Generic robot\r
-'robot',\r
-'blog',\r
-'checker',\r
-'crawl',\r
-'discover',\r
-'feed',\r
-'fetcher',\r
-'hunter',\r
-'link',\r
-'scanner',\r
-'seek',\r
-'sitemap',\r
-'spider',\r
-'sucker',\r
-'survey',\r
-'validator',\r
-'bot[\s_+:,\.\;\/\\\-]',\r
-'[\s_+:,\.\;\/\\\-]bot',\r
-'curl',\r
-'php',\r
-'ruby/',\r
-# Moving oBot here so it doesn't get assigned for other *obot robots\r
-'oBot/',\r
-'no_user_agent'\r
-);\r
-\r
-\r
-# RobotsHashIDLib\r
-# List of robots names ('robot id','robot clear text')\r
-#-------------------------------------------------------\r
-%RobotsHashIDLib   = (\r
-# Common robots (In robot file)\r
-'bingbot/','bingbot',\r
-'bingpreview','BingPreview',\r
-'MSIECrawler','MSIECrawler',\r
-'msnbot/','msnbot',\r
-'msnbot\-media/','msnbot-media',\r
-'AdIdxBot/','AdIdxBot Microsoft Ad Quality control',\r
-'NOT[\x20]Googlebot/','NOT Googlebot',\r
-'Googlebot/','Googlebot',\r
-'Google[\x20]Web[\x20]Preview','Google Web Preview',\r
-'Googlebot\-Image/','Googlebot-Image',\r
-'Googlebot\-Mobile/','Googlebot-Mobile',\r
-'Google[\x20]Page[\x20]Speed','Google Page Speed',\r
-'google\-sitemaps','google-sitemaps',\r
-'Googlebot\-News','Googlebot-News',\r
-'Googlebot\-Video/','Googlebot-Video',\r
-'AdsBot\-Google[\x20]\(','AdsBot-Google',\r
-'AdsBot\-Google\-Mobile\-Apps','AdsBot-Google-Mobile-Apps',\r
-'Adsbot','Adsbot',\r
-'Mediapartners-Google','Mediapartners-Google',\r
-'Feedfetcher\-Google','Feedfetcher-Google',\r
-'Google\-Adwords\-Instant','Google-Adwords-Instant',\r
-'Firefox/1\.5','Nautic Expo using Firefox/1.5',\r
-'Yahoo![\x20]Slurp[\x20]China','Yahoo! Slurp China',\r
-'Yahoo![\x20]Slurp','Yahoo! Slurp',\r
-'Baiduspider/','Baiduspider',\r
-'Baiduspider\-image','Baiduspider-image',\r
-'Baiduspider-','Baiduspider ( catchall )',\r
-'YandexBot/','YandexBot',\r
-'YandexImages/','YandexImages',\r
-'YandexImageResizer','YandexImageResizer',\r
-'YandexMetrika/','YandexMetrika',\r
-'YandexMobileBot/','YandexMobileBot',\r
-'yandex','Yandex ( catchall )',\r
-'electricmonk/','electricmonk',\r
-'spbot/','spbot',\r
-'SeznamBot/','SeznamBot',\r
-'msie8','msie8 - ( Rogue Robot )',\r
-'AhrefsBot/','AhrefsBot',\r
-'007ac9[\x20]Crawler','007ac9 Crawler',\r
-'2345Explorer/','2345Explorer',\r
-'360Spider','360Spider',\r
-'A[\x20]Simple[\x20]Crawler','A Simple Crawler',\r
-'Abrave','Abrave',\r
-'acapbot/','acapbot',\r
-'Accoona\-AI\-Agent/','Accoona-AI-Agent',\r
-'AdnormCrawlerCatchBot/','AdnormCrawlerCatchBot',\r
-'adscanner','adscanner',\r
-'aiHitBot/','aiHitBot',\r
-'aipbot/','aipbot',\r
-'AlphaBot','AlphaBot',\r
-'Apache\-HttpClient/','Apache-HttpClient',\r
-'Apexoo[\x20]Spider','Apexoo Spider',\r
-'Applebot/','Applebot',\r
-'arcemedia','AdsBot-ArceMedia',\r
-'archive\.org_bot','archive.org_bot',\r
-'Babya[\x20]Discoverer','Babya Discoverer',\r
-'Barkrowler','Barkrowler',\r
-'BDCbot/','BDCbot',\r
-'BellPagesCA/','BellPagesCA',\r
-'BeNosey[\x20]Mohawk[\x20]Search','BeNosey Mohawk Search',\r
-'bhcBot','bhcBot',\r
-'bidswitchbot','bidswitchbot',\r
-'BigBozz/','BigBozz',\r
-'BinGet/','BinGet',\r
-'bitlybot','bit.ly',\r
-'bl\.uk_lddc_bot/','bl.uk_lddc_bot',\r
-'BLEXBot/','BLEXBot',\r
-'bnf.fr_bot','bnf.fr_bot',\r
-'boitho\.com\-dc/','boitho.com-dc',\r
-'BoogleBot','BoogleBot',\r
-'BusinessBot:','BusinessBot:',\r
-'BW/','BW',\r
-'Bytespider','Bytespider',\r
-'CatchBot/','CatchBot',\r
-'CB/Nutch','CB/Nutch',\r
-'CCBot/','CCBot',\r
-'CheckMarkNetwork/','CheckMarkNetwork',\r
-'Cliqzbot/','Cliqzbot',\r
-'CMS[\x20]Crawler','CMS Crawler',\r
-'Companybook\-Crawler','Companybook-Crawler',\r
-'ConveraCrawler/','ConveraCrawler',\r
-'Contacts-Crawler','Contacts-Crawler',\r
-'contxbot','contxbot',\r
-'cosmos/','cosmos',\r
-'CRMNLCrawlAgent','CRMNLCrawlAgent',\r
-'crawl/Nutch','crawl/Nutch',\r
-'crawler4j','crawler4j',\r
-'CRAZYWEBCRAWLER','CRAZYWEBCRAWLER',\r
-'CSE[\x20]HTML[\x20]Validator','CSE HTML Validator',\r
-'C\-T[\x20]bot','C-T bot',\r
-'CUBOT','CUBOT',\r
-'Curl/PHP','Curl/PHP',\r
-'cyencebot','cyencebot',\r
-'DataCrawler/','DataCrawler',\r
-'daumoa','daumoa',\r
-'daum','daum',\r
-'Deepnet[\x20]Explorer','Deepnet Explorer',\r
-'DeuSu/','DeuSu',\r
-'Digincore','Digincore',\r
-'Discordbot/','Discordbot',\r
-'Dispatch/','Dispatch',\r
-'DnyzBot','DnyzBot',\r
-'DoCoMo/','DoCoMo',\r
-'Domain[\x20]Re\-Animator[\x20]Bot','Domain Re-Animator Bot',\r
-'DomainCrawler/','DomainCrawler',\r
-'DomainMacroCrawler/','DomainMacroCrawler',\r
-'DomainSONOCrawler/','DomainSONOCrawler',\r
-'DomainStatsBot/','DomainStatsBot',\r
-'DotBot/','DotBot',\r
-'DuckDuckBot-Https','DuckDuckBot-Https',\r
-'DuckDuckBot','DuckDuckBot',\r
-'DuckDuckGo\-Favicons\-Bot/','DuckDuckGo-Favicons-Bot',\r
-'ELinks/','ELinks',\r
-'ELinks[\x20]\(','ELinks (',\r
-'EmailMarketingRobot/','EmailMarketingRobot',\r
-'EmeraldShield\.com[\x20]WebBot','EmeraldShield.com WebBot',\r
-'envolk\[ITS\]spider/','envolk ITS spider',\r
-'eright','eright',\r
-'EsperanzaBot','EsperanzaBot',\r
-'Exabot/','Exabot',\r
-'ExtLinksBot','ExtLinksBot',\r
-'ExperianCrawlUK','ExperianCrawlUK',\r
-'facebookexternalhit/','facebookexternalhit',\r
-'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de','FAST Enterprise crawleradmin.t-info@telekom.de',\r
-'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de','FAST Enterprise T-Info_BI_cluster crawleradmin.t-info@telekom.de',\r
-'FAST\-WebCrawler/','FAST-WebCrawler',\r
-'Feosey[\x20]Mohk[\x20]Crawler','Feosey Mohk Crawler',\r
-'findlinks/','findlinks',\r
-'Findxbot/','Findxbot',\r
-'FirePHP/','FirePHP',\r
-'firstdirectory\-bot','firstdirectory-bot',\r
-'flamingo','Flamingo_SearchEngine',\r
-'FlippyBearBot/','FlippyBearBot',\r
-'^foo$','foo',\r
-'Foregenix[\x20]Web[\x20]Scan','Foregenix Web Scan',\r
-'FreeWebMonitoring[\x20]SiteChecker/','FreeWebMonitoring SiteChecker',\r
-'fujilabol','fujilabol',\r
-'FurlBot/','FurlBot',\r
-'Gaisbot/','Gaisbot',\r
-'Gallent[\x20]Spider','Gallent Spider',\r
-'GarlikCrawler/','GarlikCrawler',\r
-'Getintent[\x20]Crawler','GetIntent Crawler',\r
-'GetintentCrawler[\x20]getintent\.com','GetintentCrawler getintent.com',\r
-'Gigabot/','Gigabot',\r
-'gipo\-crawler/Nutch','gipo-crawler/Nutch',\r
-'Girafabot','Girafabot',\r
-'Gluten[\x20]Free[\x20]Crawler/','Gluten Free Crawler',\r
-'gocrawl','gocrawl',\r
-'Gowikibot','Gowikibot',\r
-'Go\-http\-client/','Go-http-client',\r
-'GrapeshotCrawler/','GrapeshotCrawler',\r
-'GSiteCrawler/','GSiteCrawler',\r
-'GurujiBot/','GurujiBot',\r
-'hadiBot','hadiBot',\r
-'HaosouSpider','HaosouSpider',\r
-'HELLO[\x20]Crawler','HELLO Crawler',\r
-'holmes/','holmes',\r
-'houzzbot','houzzbot',\r
-'HTTP_Request2/','HTTP_Request2',\r
-'HubSpot[\x20]Webcrawler','HubSpot Webcrawler',\r
-'HyperCrawl/','HyperCrawl',\r
-'ICC\-Crawler/','ICC-Crawler',\r
-'iconoclast','iconoclast',\r
-'IDGCrawler/Nutch','IDGCrawler/Nutch',\r
-'IDG/UK','IDG/UK',\r
-'idmarch[\x20]Automatic\.beta/','idmarch Automatic.beta',\r
-'InbyBot','InbyBot',\r
-'Incutio[\x20]XML','Incutio XML',\r
-'IndeedBot','IndeedBot',\r
-'InfluenceBot','InfluenceBot',\r
-'IonCrawl','IonCrawl',\r
-'IRLbot/','IRLbot',\r
-'IssueCrawler','IssueCrawler',\r
-'istellabot/','istellabot',\r
-'James[\x20]BOT','James BOT',\r
-'Jigsaw/','Jigsaw',\r
-'JobFeed','JobFeed',\r
-'Jooblebot','Jooblebot',\r
-'KomodiaBot/','KomodiaBot',\r
-'Konqueror/','Konqueror',\r
-'laserlikebot','laserlikebot',\r
-'Lightspeed','Lightspeed',\r
-'linkapediabot','linkapediabot',\r
-'metager\-linkchecker','metager-linkchecker',\r
-'Linguee[\x20]Bot','Linguee Bot',\r
-'linkchecker','linkchecker',\r
-'LinkCheck','LinkCheck',\r
-'linkdexbot/','linkdexbot',\r
-'LinkedInBot/','LinkedInBot',\r
-'LinkpadBot/','LinkpadBot',\r
-'Links[\x20]\(','Links (',\r
-'LinksManager\.com_bot','LinksManager.com_bot',\r
-'LWP::Simple/','LWP::Simple',\r
-'Mail\.RU_Bot/','Mail.RU Bot',\r
-'makecontact','makecontact',\r
-'mappy','Mappy Crawler',\r
-'MauiBot','MauiBot',\r
-'meanpathbot/','meanpathbot',\r
-'Mechanize','Mechanize',\r
-'Mediatoolkitbot','Mediatoolkitbot',\r
-'MegaIndex\.ru/','MegaIndex.ru',\r
-'merzscope','merzscope',\r
-'Meta_Bot','Meta_Bot',\r
-'mfibot/','mfibot',\r
-'microsoft.*discovery','Microsoft Office Protocol Discovery',\r
-'missigua_locator','missigua_locator',\r
-'MixrankBot','MixrankBot',\r
-'MJ12bot/','MJ12bot',\r
-'MojeekBot','MojeekBot',\r
-'Mojolicious','Mojolicious',\r
-'MXT/Nutch','MXT/Nutch',\r
-'My[\x20]Nutch[\x20]Spider/','My Nutch Spider',\r
-'myse/Nutch','myse/Nutch',\r
-'Naaraa','Naaraa',\r
-'Neevabot','Neevabot',\r
-'NerdyBot','NerdyBot',\r
-'netEstate[\x20]NE[\x20]Crawler','netEstate NE Crawler',\r
-'NetResearchServer/','NetResearchServer',\r
-'Nimbostratus-Bot','Nimbostratus-Bot',\r
-'nominet','nominet',\r
-'NRLCorpusBuilder/Nutch','NRLCorpusBuilder/Nutch',\r
-'nutch\-1\.4/','nutch-1.4',\r
-'nutch\-1\.8/','nutch-1.8',\r
-'NutchCVS/','NutchCVS',\r
-'o\.uk[\x20]robot','o uk.robot',\r
-'ocrawler;','ocrawler;',\r
-'ODP[\x20]link[\x20]checker','ODP link checker',\r
-'Offline[\x20]Explorer/','Offline Explorer',\r
-'OmniExplorer_Bot/','OmniExplorer_Bot',\r
-'OrangeBot/','OrangeBot',\r
-'Orliac','Orliac',\r
-'OutclicksBot','OutclicksBot',\r
-'PageBitesHyperBot/','PageBitesHyperBot',\r
-'Pcore','Pcore',\r
-'pdffillerbot/','pdffillerbot',\r
-'peopleman','peopleman',\r
-'PetalBot','PetalBot',\r
-'PhantomJS','PhantomJS',\r
-'PHP/5\.2\.8','PHP/5.2.8',\r
-'Pinterestbot','Pinterestbot',\r
-'PiplBot','PiplBot',\r
-'Ploetz[\x20]\+[\x20]Zeller','Ploetz + Zeller',\r
-'Plukkie/','Plukkie',\r
-'Princetonbot/','Princetonbot',\r
-'PrivacyAwareBot/','PrivacyAwareBot',\r
-'Prlog/','Prlog',\r
-'proximic','proximic',\r
-'psbot/','psbot',\r
-'psbot\-image','psbot-image',\r
-'python_wk_crawler','python_wk_crawler',\r
-'Python\-urllib/','Python-urllib',\r
-'QCrawl','QCrawl',\r
-'Quick-Crawler','Quick-Crawler',\r
-'ResearchBot','ResearchBot',\r
-'roboto','roboto',\r
-'rogerbot/','rogerbot',\r
-'RSSingBot','RSSingBot',\r
-'RukiCrawler/','RukiCrawler',\r
-'SafeDNS[\x20]search[\x20]bot/','SafeDNS search bot',\r
-'SafeDNSBot','SafeDNSBot',\r
-'SafeSearch[\x20]microdata[\x20]crawler','SafeSearch microdata crawler',\r
-'safesearch','safesearch ( catchall )',\r
-'SBL\-BOT','SBL-BOT',\r
-'scrapy','scrapy',\r
-'Screaming[\x20]Frog[\x20]SEO[\x20]Spider/','Screaming Frog SEO Spider',\r
-'ScreenerBot[\x20]Crawler[\x20]Beta','ScreenerBot Crawler Beta',\r
-'Scrubby','Scrubby',\r
-'Searchie/','Searchie',\r
-'SecurityResearch\.bot','Security Research Bot',\r
-'Seekmo','Seekmo',\r
-'semanticbot','semanticbot',\r
-'SemrushBot/','SemrushBot',\r
-'SemrushBot-SI','SemrushBot-SI',\r
-'seo\-audit\-check\-bot/','seo-audit-check-bot',\r
-'Seobility','Seobility',\r
-'SEOkicks\-Robot','SEOkicks-Robot',\r
-'SEOlyticsCrawler/','SEOlyticsCrawler',\r
-'SEOstats','SEOstats',\r
-'Seosys/Nutch','Seosys/Nutch',\r
-'Seoterritory\.com[\x20]bot','Seoterritory.com.bot',\r
-'serendeputy','serendeputy',\r
-'Shim\-Crawler','Shim-Crawler',\r
-'SiteExplorer/','SiteExplorer',\r
-'siteexplorer\.info','siteexplorer.info',\r
-'siteimprove','siteimprove',\r
-'Slackbot\-LinkExpanding','Slackbot-LinkExpanding',\r
-'SmabblerBot/','SmabblerBot',\r
-'Sogou[\x20]web[\x20]spider/','Sogou web spider',\r
-'special_archiver/','special_archiver',\r
-'Spiderbot/','Spiderbot',\r
-'SpuhexBot','SpuhexBot',\r
-'spyonweb','spyonweb',\r
-'ssearch_bot','ssearch_bot',\r
-'Streamline3Bot','Streamline3Bot',\r
-'SurdotlyBot/','SurdotlyBot',\r
-'SurveyBot/','SurveyBot',\r
-'taiil/Nutch','taiil/Nutch',\r
-'tbot\-nutch','tbot-nutch',\r
-'TeeRaidBot','TeeRaidBot',\r
-'TelegramBot','TelegramBot',\r
-'Test/Nutch','Test/Nutch',\r
-'Test[\x20]Spider','Test Spider',\r
-'TestCrawler','TestCrawler',\r
-'The[\x20]Knowledge[\x20]AI', 'The Knowledge AI',\r
-'TkBot','TkBot',\r
-'tracemyfile','tracemyfile',\r
-'trendiction','trendiction',\r
-'TurnitinBot/','TurnitinBot',\r
-'TurnitinBot','TurnitinBot',\r
-'TweetmemeBot/','TweetmemeBot',\r
-'UCY/Nutch','UCY/Nutch',\r
-'uni-leipzig\.de','uni-leipzig.de',\r
-'Uptimebot/','Uptimebot',\r
-'UptimeRobot/','UptimeRobot',\r
-'URL[\x20]Checker','URL Checker',\r
-'UXCrawlerBot','UXCrawlerBot',\r
-'Validator\.nu/','Validator.nu',\r
-'vBSEO','vBSEO',\r
-'vBulletin[\x20]via[\x20]PHP','vBulletin via PHP',\r
-'vebidoobot','vebidoobot',\r
-'vegi[\x20]bot','vegi bot',\r
-'Velen','Velen',\r
-'viz/Nutch','viz/Nutch',\r
-'VoilaBot','VoilaBot',\r
-'VORTEX/','VORTEX',\r
-'voyager/','voyager',\r
-'vuhuvBot','vuhuvBot',\r
-'W3C_Validator/','W3C_Validator',\r
-'W3C\-checklink/','W3C-checklink',\r
-'WBSearchBot/','WBSearchBot',\r
-'WbSrch/','WbSrch/',\r
-'WeSEE:Ads/PageBot','WeSEE:Ads/PageBot',\r
-'WeSEE:Ads/PictureBot','WeSEE:Ads/PictureBot',\r
-'WeSEE_Bot','WeSEE_Bot',\r
-'Wget/','Wget',\r
-'Who\.is[\x20]Bot','Who.is.Bot',\r
-'wonderbot/','wonderbot',\r
-'woobot/','woobot',\r
-'Wotbox/','Wotbox',\r
-'Xaldon[\x20]WebSpider','Xaldon WebSpider',\r
-'Xenu[\x20]Link[\x20]Sleuth','Xenu Link Sleuth',\r
-'xenu_link_sleuth','xenu_link_sleuth',\r
-'XML[\x20]Sitemaps[\x20]Generator','XML Sitemaps Generator',\r
-'XoviBot/','XoviBot',\r
-'yacybot','yacybot',\r
-'Yahoo[\x20]Link[\x20]Preview','Yahoo Link Preview',\r
-'yak','yak-linkfluence',\r
-'YisouSpider','YisouSpider',\r
-'yoozBot','yoozBot',\r
-'Your\-Website\-Sucks','Your-Website-Sucks',\r
-'zoominfobot','zoominfobot',\r
-'zspider/','zspider',\r
-'ZumBot/','ZumBot',\r
-'ng/1\.','ng/1.',\r
-'ng/2\.','ng/2.',\r
-'libwww\-perl','libwww-perl',\r
-'urllib','urllib',\r
-'javabee','javabee',\r
-'projectwf\-java\-test\-crawler','projectwf-java-test-crawler',\r
-'java','Java ( catchall )',\r
-'loocalcrawler/nutch','loocalcrawler/nutch',\r
-'nutchosu\-vlib','nutchosu-vlib',\r
-'nutch','nutch ( catchall )',\r
-'perlcrawler','perlcrawler',\r
-'perl','perl',\r
-'(firefox/)([0-9]\.|[0-1][0]\.)','Firefox version 10 and lower - various robots',\r
-\r
-# Less common robots (In robot file)\r
-'^Mozilla$','Mozilla ( Rogue Robot )',\r
-'^mozilla\/3\.0\s\(compatible$', 'mozilla/3.0 (compatible - ( Rogue Robot )',\r
-'^mozilla\/4\.0$', 'mozilla/4.0 - ( Rogue Robot )',\r
-'^mozilla\/4\.0\s\(compatible;\)$', 'mozilla/4.0 (compatible;) - ( Rogue Robot )',\r
-'^mozilla\/5\.0$', 'mozilla/5.0 - ( Rogue Robot )',\r
-'^mozilla\/5\.0\s\(compatible;$', 'mozilla/5.0 (compatible; - ( Rogue Robot )',\r
-'^mozilla\/5\.0\s\(en\-us\)$', 'mozilla/5.0 (en-us) - ( Rogue Robot )',\r
-'^mozilla\/5\.0\sfirefox\/3\.0\.5$', 'mozilla/5.0 firefox/3.0.5 - ( Rogue Robot )',\r
-'^Mozilla/6\.0[\x20]\(compatible\)$','Mozilla/6.0 (compatible) - ( Rogue Robot )',\r
-'^Mozilla/(.*)Beta[\x20]\(Windows\)','Mozilla Beta (Windows) - ( Rogue Robot )',\r
-'MSIE[\x20]2','MSIE 2 - ( Rogue Robot )',\r
-'MSIE[\x20]3','MSIE 3 - ( Rogue Robot )',\r
-'MSIE[\x20]4','MSIE 4 - ( Rogue Robot )',\r
-'MSIE[\x20]5','MSIE 5 - ( Rogue Robot )',\r
-'MSIE[\x20]6','MSIE 6 - ( Rogue Robot )',\r
-'MSIE\+6\.0\;','MSIE+6.0; - ( Rogue Robot)',\r
-'Windows[\x20]95','Windows 95 - ( Rogue Robot )',\r
-'Windows[\x20]98','Windows 99 - ( Rogue Robot )',\r
-\r
-# these could be removed to speed up processing as they are rarely seen\r
-'a6\-indexer','a6-indexer',\r
-'abcdatos','abcdatos',\r
-'abonti\.com','abonti.com',\r
-'acme\.spider','acme.spider',\r
-'activebookmark','activebookmark',\r
-'adamm_bot','adamm_bot',\r
-'advbot','advbot',\r
-'affectv\.co\.uk','affectv.co.uk',\r
-'ahoythehomepagefinder','ahoythehomepagefinder',\r
-'aleadsoftbot','aleadsoftbot',\r
-'alkaline','alkaline',\r
-'allrati','allrati',\r
-'alltop','alltop',\r
-'almaden','almaden',\r
-'alpha_search_agent','alpha_search_agent',\r
-'anthill','anthill',\r
-'antibot','antibot',\r
-'aport','aport',\r
-'appie','appie',\r
-'applesyndication','applesyndication',\r
-'arachnophilia','arachnophilia',\r
-'arale','arale',\r
-'araneo','araneo',\r
-'architext','architext',\r
-'archive\-de\.com','archive-de.com',\r
-'aretha','aretha',\r
-'argus','argus',\r
-'ariadne','ariadne',\r
-'arianna\.libero\.it','arianna.libero.it',\r
-'arks','arks',\r
-'aspider','aspider',\r
-'aspseek','aspseek',\r
-'asterias','asterias',\r
-'asynchttpclient','asynchttpclient',\r
-'atn\.txt','atn.txt',\r
-'atomz','atomz',\r
-'auresys','auresys',\r
-'awbot','awbot',\r
-'backlinktest\.com','backlinktest.com',\r
-'backrub','backrub',\r
-'bbot','bbot',\r
-'becomebot','becomebot',\r
-'bender','bender',\r
-'betabot','betabot',\r
-'bigbrother','bigbrother',\r
-'biglotron','biglotron',\r
-'BingLocalSearch','BingLocalSearch',\r
-'bittorrent_bot','bittorrent_bot',\r
-'biz360[_+\s]spider','biz360 spider',\r
-'bjaaland','bjaaland',\r
-'blackwidow','blackwidow',\r
-'blindekuh','blindekuh',\r
-'blogbridge[_+\s]service','blogbridge service',\r
-'blogged_crawl','blogged_crawl',\r
-'bloglines','bloglines',\r
-'bloglovin','bloglovin',\r
-'blogpulse','blogpulse',\r
-'blogsearch','blogsearch',\r
-'blogshares','blogshares',\r
-'blogslive','blogslive',\r
-'blogssay','blogssay',\r
-'bloodhound','bloodhound',\r
-'bncf\.firenze\.sbn\.it/raccolta\.txt','bncf\.firenze\.sbn.it/raccolta.txt',\r
-'bobby','bobby',\r
-'bookmark\-manager','bookmark-manager',\r
-'borg\-bot','borg-bot',\r
-'boris','boris',\r
-'brightnet','brightnet',\r
-'bruinbot','bruinbot',\r
-'bspider','bspider',\r
-'bubing','bubing',\r
-'bumblebee','bumblebee',\r
-'butterfly','butterfly',\r
-'buzztracker','buzztracker',\r
-'cactvschemistryspider','cactvschemistryspider',\r
-'calif[^r]','calif[^r]',\r
-'candlelight[_+\s]favorites[_+\s]inspector','candlelight favorites inspector',\r
-'careerbot','careerbot',\r
-'carpathia','carpathia',\r
-'cassandra','cassandra',\r
-'catbot','catbot',\r
-'cbn00glebot','cbn00glebot',\r
-'cerberian_drtrs','cerberian_drtrs',\r
-'cfetch','cfetch',\r
-'cgireader','cgireader',\r
-'chattertrap','chattertrap',\r
-'check_http','check_http',\r
-'checkbot','checkbot',\r
-'checkweb_link_validator','checkweb_link_validator',\r
-'christcrawler','christcrawler',\r
-'churl','churl',\r
-'cienciaficcion','cienciaficcion',\r
-'cipinetbot','cipinetbot',\r
-'imagecoccoc','imagecoccoc',\r
-'coccoc','coccoc',\r
-'coldfusion','coldfusion',\r
-'collective','collective',\r
-'combine','combine',\r
-'commons\-httpclient','commons-httpclient',\r
-'computer_and_automation_research_institute_crawler','computer_and_automation_research_institute_crawler',\r
-'conceptbot','conceptbot',\r
-'contentmatch','contentmatch',\r
-'converamultimediacrawler','converamultimediacrawler',\r
-'coolbot','coolbot',\r
-'copubbot','copubbot',\r
-'core','core',\r
-'covario','covario',\r
-'cruiser','cruiser',\r
-'cscrawler','cscrawler',\r
-'cuasarbot','cuasarbot',\r
-'cursor','cursor',\r
-'cusco','cusco',\r
-'custo','custo',\r
-'cyberspyder','cyberspyder',\r
-'datafountains/dmoz_downloader','datafountains/dmoz_downloader',\r
-'dataprovider\.com','dataprovider.com',\r
-'daviesbot','daviesbot',\r
-'daylifefeedfetcher','daylifefeedfetcher',\r
-'daypopbot','daypopbot',\r
-'deepindex','deepindex',\r
-'desertrealm','desertrealm',\r
-'deweb','deweb',\r
-'dienstspider','dienstspider',\r
-'digger','digger',\r
-'digout4u','digout4u',\r
-'diibot','diibot',\r
-'dipsie\.bot','dipsie.bot',\r
-'direct_hit','direct_hit',\r
-'discobot','discobot',\r
-'dlvr\.it','dlvr.it',\r
-'dnabot','dnabot',\r
-'dnsgroup','dnsgroup',\r
-'doccheckbot','doccheckbot',\r
-'domainappender','domainappender',\r
-'domainchecker','domainchecker',\r
-'domainsdb\.net','domainsdb.net',\r
-'download_express','download_express',\r
-'dragonbot','dragonbot',\r
-'dreamwidth','dreamwidth',\r
-'drupal','drupal',\r
-'dulance','dulance',\r
-'dumbot','dumbot',\r
-'dumm\.de\-bot','dumm.de-bot',\r
-'dwcp','dwcp',\r
-'e\-collector','e-collector',\r
-'earthcom\.info','earthcom.info',\r
-'easydl','easydl',\r
-'ebiness','ebiness',\r
-'eccp','eccp',\r
-'echo!','echo!',\r
-'edgeio\-retriever','edgeio-retriever',\r
-'elfinbot','elfinbot',\r
-'emacs','emacs',\r
-'emcspider','emcspider',\r
-'enteprise','enteprise',\r
-'ernst[:blank:]2\.0','ernst[:blank:]2.0',\r
-'esther','esther',\r
-'ets_v','ets_v',\r
-'eventax','eventax',\r
-'everbeecrawler','everbeecrawler',\r
-'everest\-vulcan','everest-vulcan',\r
-'evliyacelebi','evliyacelebi',\r
-'exactseek','exactseek',\r
-'extreme[_+\s]picture[_+\s]finder','extreme picture finder',\r
-'ezoom','ezoom',\r
-'ezresult','ezresult',\r
-'facebook','facebook',\r
-'facebot','facebot',\r
-'fast\-search\-engine','fast-search-engine',\r
-'matrix_s\.p\.a\._\-_fast_enterprise_crawler','matrix_s.p.a._-_fast_enterprise_crawler',\r
-'fast_enterprise_crawler','fast_enterprise_crawler',\r
-'fastbot','fastbot',\r
-'fastcrawler','fastcrawler',\r
-'favicon','favicon',\r
-'favorg','favorg',\r
-'favorites_sweeper','favorites_sweeper',\r
-'fdse','fdse',\r
-'feedburner','feedburner',\r
-'feedcrawl','feedcrawl',\r
-'feedflow','feedflow',\r
-'feedmyinbox','feedmyinbox',\r
-'feedroll\.com','feedroll.com',\r
-'feedsky','feedsky',\r
-'feedster','feedster',\r
-'feedvalidator','feedvalidator',\r
-'feedzira','feedzira',\r
-'felix','felix',\r
-'ferret','ferret',\r
-'fetchbot','fetchbot',\r
-'fetchrover','fetchrover',\r
-'fever/','fever',\r
-'fido','fido',\r
-'filmkamerabot','filmkamerabot',\r
-'filterdb\.iss\.net','filterdb.iss.net',\r
-'finderlein[_+\s]research[_+\s]crawler','finderlein research crawler',\r
-'findexa_crawler','findexa_crawler',\r
-'finnish','finnish',\r
-'fireball','fireball',\r
-'firmilybot','firmilybot',\r
-'flexum','flexum',\r
-'foaf\-search\.net','foaf-search.net',\r
-'fooky\.com/ScorpionBot','fooky.com/ScorpionBot',\r
-'fouineur','fouineur',\r
-'francoroute','francoroute',\r
-'freecrawl','freecrawl',\r
-'freenews','freenews',\r
-'funnelweb','funnelweb',\r
-'g2crawler','g2crawler',\r
-'gama','gama',\r
-'gazz','gazz',\r
-'gcreep','gcreep',\r
-'geniebot','geniebot',\r
-'genieo','genieo',\r
-'geohasher','geohasher',\r
-'getbot','getbot',\r
-'geturl','geturl',\r
-'gigablastopensource','gigablastopensource',\r
-'global_fetch','global_fetch',\r
-'gnodspider','gnodspider',\r
-'goforit\.com','goforit.com',\r
-'goforitbot','goforitbot',\r
-'golem','golem',\r
-'gonzo','gonzo',\r
-'gougou','gougou',\r
-'gpu_p2p_crawler','gpu_p2p_crawler',\r
-'grabber','grabber',\r
-'grapeshot','grapeshot',\r
-'grapnel','grapnel',\r
-'griffon','griffon',\r
-'gromit','gromit',\r
-'grub','grub',\r
-'gulliver','gulliver',\r
-'gulperbot','gulperbot',\r
-'hambot','hambot',\r
-'hanrss','hanrss',\r
-'harvest','harvest',\r
-'havindex','havindex',\r
-'henrythemiragorobot','henrythemiragorobot',\r
-'heritrix','heritrix',\r
-'hl_ftien_spider','hl_ftien_spider',\r
-'hometown','hometown',\r
-'hoowwwer','hoowwwer',\r
-'hpprint','hpprint',\r
-'htdig','htdig',\r
-'html[_+\s]link[_+\s]validator','html link validator',\r
-'htmlgobble','htmlgobble',\r
-'htmlparser','htmlparser',\r
-'httrack','httrack',\r
-'hundesuche\.com\-bot','hundesuche.com-bot',\r
-'hyperdecontextualizer','hyperdecontextualizer',\r
-'ia_archiver\-web\.archive\.org','ia_archiver-web.archive.org',\r
-'ia_archiver','ia_archiver',\r
-'iajabot','iajabot',\r
-'iaskspider','iaskspider',\r
-'i\-bot','i-bot',\r
-'icarus6j','icarus6j',\r
-'ichiro','ichiro',\r
-'icjobs\.de','icjobs.de',\r
-'ilse','ilse',\r
-'iltrovatore\-setaccio','iltrovatore-setaccio',\r
-'imagelock','imagelock',\r
-'implisensebot','implisensebot',\r
-'inagist','inagist',\r
-'incywincy','incywincy',\r
-'infobot','infobot',\r
-'infociousbot','infociousbot',\r
-'infohelfer','infohelfer',\r
-'infomine','infomine',\r
-'informant','informant',\r
-'infoseeksidewinder','infoseeksidewinder',\r
-'infoseek','infoseek',\r
-'infospider','infospider',\r
-'inspectorwww','inspectorwww',\r
-'insurancobot','insurancobot',\r
-'integromedb\.org','integromedb.org',\r
-'intelliagent','intelliagent',\r
-'internet[_+\s]ninja','internet ninja',\r
-'internetarchive','internetarchive',\r
-'internetseer','internetseer',\r
-'internetsupervision','internetsupervision',\r
-'ips\-agent','ips-agent',\r
-'irobot','irobot',\r
-'iron33','iron33',\r
-'isearch2006','isearch2006',\r
-'israelisearch','israelisearch',\r
-'iupui_research_bot','iupui_research_bot',\r
-'izsearch','izsearch',\r
-'jacobin[\x20]club','jacobin club',\r
-'jakarta','jakarta',\r
-'jbot','jbot',\r
-'jcrawler','jcrawler',\r
-'jeeves','jeeves',\r
-'jennybot','jennybot',\r
-'jobboerse','jobboerse',\r
-'jobot','jobot',\r
-'jobo','jobo',\r
-'joebot','joebot',\r
-'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility','jrtwine software check favorites utility',\r
-'js\-kit','js-kit',\r
-'jubii','jubii',\r
-'jumpstation','jumpstation',\r
-'justview','justview',\r
-'kalambot','kalambot',\r
-'kamano\.de_newsfeedverzeichnis','kamano.de_newsfeedverzeichnis',\r
-'kapsi','kapsi',\r
-'katipo','katipo',\r
-'kazoombot','kazoombot',\r
-'kevin','kevin',\r
-'keyoshid','keyoshid',\r
-'kilroy','kilroy',\r
-'kinja\-imagebot','kinja-imagebot',\r
-'kinjabot','kinjabot',\r
-'knowitall','knowitall',\r
-'knowledge\.com','knowledge.com',\r
-'ko[_+\s]yappo[_+\s]robot','ko yappo robot',\r
-'kouaa_krawler','kouaa_krawler',\r
-'krugle','krugle',\r
-'ksibot','ksibot',\r
-'kummhttp','kummhttp',\r
-'kurzor','kurzor',\r
-'labelgrabber\.txt','labelgrabber.txt',\r
-'lanshanbot','lanshanbot',\r
-'larbin','larbin',\r
-'largesmall[\x20]crawler','largesmall crawler',\r
-'legs','legs',\r
-'letscrawl\.com','letscrawl.com',\r
-'libcrawl','libcrawl',\r
-'lilina','lilina',\r
-'link_valet_online','link_valet_online',\r
-'linkbot','linkbot',\r
-'linkdex\.com','linkdex.com',\r
-'linkidator','linkidator',\r
-'linkscan','linkscan',\r
-'linkstats[\x20]bot','linkstats bot',\r
-'linkwalker','linkwalker',\r
-'lipperhey','lipperhey',\r
-'livejournal\.com','livejournal.com',\r
-'lmspider','lmspider',\r
-'loadtimebot','loadtimebot',\r
-'lockon','lockon',\r
-'logo_gif','logo_gif',\r
-'longurl','longurl',\r
-'lssrocketcrawler','lssrocketcrawler',\r
-'ltbot','ltbot',\r
-'ltx71','ltx71',\r
-'lwp\-request','lwp-request',\r
-'lwp\-trivial','lwp-trivial',\r
-'lycos[_+\s]','lycos ',\r
-'macworm','macworm',\r
-'madaali\.de','madaali.de',\r
-'magpierss','magpierss',\r
-'magpie','magpie',\r
-'mapoftheinternet\.com','mapoftheinternet.com',\r
-'marvin','marvin',\r
-'mattie','mattie',\r
-'mediabot','mediabot',\r
-'mediafox','mediafox',\r
-'megaindex','megaindex',\r
-'megite','megite',\r
-'memorybot','memorybot',\r
-'mercator','mercator',\r
-'meshexplorer','meshexplorer',\r
-'metager2\-verification\-bot','metager2-verification-bot',\r
-'metajobbot','metajobbot',\r
-'metaspinner','metaspinner',\r
-'metauri','metauri',\r
-'miadev','miadev',\r
-'microsoft[_+\s]url[_+\s]control','microsoft url control',\r
-'microsoft[\x20]bits','microsoft bits',\r
-'microsoft\-webdav\-miniredir','microsoft-webdav-miniredir',\r
-'mindcrawler','mindcrawler',\r
-'mindupbot','mindupbot',\r
-'mini\-reptile','mini-reptile',\r
-'minirank','minirank',\r
-'misterbot','misterbot',\r
-'miva','miva',\r
-'mizzu_labs','mizzu_labs',\r
-'mnogosearch','mnogosearch',\r
-'moget','moget',\r
-'momspider','momspider',\r
-'monster','monster',\r
-'motor','motor',\r
-'movabletype','movabletype',\r
-'ms[_+\s]search[_+\s]6\.0[_+\s]robot','ms search 6.0 robot',\r
-'ms_search_4\.0_robot','ms_search_4.0_robot',\r
-'msnbot\-udiscovery','msnbot-udiscovery',\r
-'msrabot','msrabot',\r
-'msrbot','msrbot',\r
-'mt::telegraph::agent','mt::telegraph::agent',\r
-'muncher','muncher',\r
-'muscatferret','muscatferret',\r
-'mwdsearch','mwdsearch',\r
-'mydoyouhike','mydoyouhike',\r
-'myweb','myweb',\r
-'nagios','nagios',\r
-'nasa_search','nasa_search',\r
-'ndspider','ndspider',\r
-'nederland\.zoek','nederland.zoek',\r
-'netcarta','netcarta',\r
-'netcraft','netcraft',\r
-'netluchs','netluchs',\r
-'netmechanic','netmechanic',\r
-'netnewswire','netnewswire',\r
-'netscoop','netscoop',\r
-'netsprint','netsprint',\r
-'netvibes','netvibes',\r
-'newrelicpinger','newrelicpinger',\r
-'newscan\-online','newscan-online',\r
-'newsfox','newsfox',\r
-'newsgatoronline','newsgatoronline',\r
-'nextgensearchbot','nextgensearchbot',\r
-'nhse','nhse',\r
-'nicebot','nicebot',\r
-'nimblecrawler','nimblecrawler',\r
-'ning','ning',\r
-'nomad','nomad',\r
-'northstar','northstar',\r
-'noxtrumbot','noxtrumbot',\r
-'npbot','npbot',\r
-'nzexplorer','nzexplorer',\r
-'objectssearch','objectssearch',\r
-'occam','occam',\r
-'ocelli','ocelli',\r
-'octopus','octopus',\r
-'octora_beta_bot','octora_beta_bot',\r
-'onet\.pl[_+\s]sa','onet.pl sa',\r
-'onfolio','onfolio',\r
-'openfind','openfind',\r
-'opentaggerbot','opentaggerbot',\r
-'openwebspider','openwebspider',\r
-'optimizer','optimizer',\r
-'oracle_ultra_search','oracle_ultra_search',\r
-'orb_search','orb_search',\r
-'orbiter','orbiter',\r
-'packrat','packrat',\r
-'pageboy','pageboy',\r
-'panscient','panscient',\r
-'parasite','parasite',\r
-'passwordmaker\.org','passwordmaker.org',\r
-'patric','patric',\r
-'pear_http_request_class','pear_http_request_class',\r
-'peerbot','peerbot',\r
-'pegasus','pegasus',\r
-'perignator','perignator',\r
-'perman','perman',\r
-'petersnews','petersnews',\r
-'phantom','phantom',\r
-'php[_+\s]version[_+\s]tracker','php version tracker',\r
-'phpcrawl','phpcrawl',\r
-'phpdig','phpdig',\r
-'picmole','picmole',\r
-'pictureofinternet','pictureofinternet',\r
-'piltdownman','piltdownman',\r
-'pimptrain','pimptrain',\r
-'ping\.blo\.gs','ping.blo.gs',\r
-'pingdom','pingdom',\r
-'pioneer','pioneer',\r
-'pita','pita',\r
-'pitkow','pitkow',\r
-'pjspider','pjspider',\r
-'plinki','plinki',\r
-'pluckfeedcrawler','pluckfeedcrawler',\r
-'plumtreewebaccessor','plumtreewebaccessor',\r
-'pogodak','pogodak',\r
-'pompos','pompos',\r
-'popdexter','popdexter',\r
-'poppi','poppi',\r
-'port_huron_labs','port_huron_labs',\r
-'portalb','portalb',\r
-'postfavorites','postfavorites',\r
-'postpost','postpost',\r
-'postrank','postrank',\r
-'powermarks','powermarks',\r
-'printfulbot','printfulbot',\r
-'proodlebot','proodlebot',\r
-'protopage','protopage',\r
-'publiclibraryarchive','publiclibraryarchive',\r
-'pyquery','pyquery',\r
-'python','python',\r
-'qihoobot','qihoobot',\r
-'quipply','quipply',\r
-'qwantify','qwantify',\r
-'r6\_','r6\_',\r
-'rambler','rambler',\r
-'ratingburner','ratingburner',\r
-'raven','raven',\r
-'rbse','rbse',\r
-'redalert','redalert',\r
-'regator','regator',\r
-'relevantnoise\.com','relevantnoise.com',\r
-'resumerobot','resumerobot',\r
-'rhcs','rhcs',\r
-'riddler','riddler',\r
-'road_runner','road_runner',\r
-'robbie','robbie',\r
-'robi','robi',\r
-'robocrawl','robocrawl',\r
-'robofox','robofox',\r
-'robozilla','robozilla',\r
-'rojo','rojo',\r
-'rome[\x20]client','rome client',\r
-'roverbot','roverbot',\r
-'rpt\-httpclient','rpt-httpclient',\r
-'rssgraffiti','rssgraffiti',\r
-'rssimagesbot','rssimagesbot',\r
-'ruffle','ruffle',\r
-'rufusbot','rufusbot',\r
-'rules','rules',\r
-'safeads\.xyz','safeads.xyz',\r
-'safetynetrobot','safetynetrobot',\r
-'sage\+\+','sage++',\r
-'sandcrawler','sandcrawler',\r
-'savetheworldheritage','savetheworldheritage',\r
-'sbider','sbider',\r
-'schizozilla','schizozilla',\r
-'scooter','scooter',\r
-'scoutjet','scoutjet',\r
-'scumbot','scumbot',\r
-'search\-info','search-info',\r
-'search_au','search_au',\r
-'searchguild[_+\s]dmoz[_+\s]experiment','searchguild dmoz experiment',\r
-'searchmetricsbot','searchmetricsbot',\r
-'searchprocess','searchprocess',\r
-'seekbot','seekbot',\r
-'semalt','semalt',\r
-'senrigan','senrigan',\r
-'sensis_web_crawler','sensis_web_crawler',\r
-'seodiver','seodiver',\r
-'seokicks\.de','seokicks.de',\r
-'seoscanners','seoscanners',\r
-'sgscout','sgscout',\r
-'shaggy','shaggy',\r
-'shaihulud','shaihulud',\r
-'shareaholicbot','shareaholicbot',\r
-'shoutcast','shoutcast',\r
-'sift','sift',\r
-'simbot','simbot',\r
-'simplepie','simplepie',\r
-'sistrix','sistrix',\r
-'site\-valet','site-valet',\r
-'sitebot','sitebot',\r
-'sitedomain\-bot','sitedomain-bot',\r
-'sitetech','sitetech',\r
-'skimbot','skimbot',\r
-'skymob','skymob',\r
-'slcrawler','slcrawler',\r
-'slurp','slurp',\r
-'slysearch','slysearch',\r
-'smartspider','smartspider',\r
-'smtbot','smtbot',\r
-'snap\.com_beta_crawler','snap.com_beta_crawler',\r
-'snappy','snappy',\r
-'snooper','snooper',\r
-'sohu\-search','sohu-search',\r
-'sohu','sohu ( catchall )',\r
-'solbot','solbot',\r
-'speedy','speedy',\r
-'sphere_scout','sphere_scout',\r
-'spider[_+\s]monkey','spider monkey',\r
-'spiderline','spiderline',\r
-'spiderlytics','spiderlytics',\r
-'spiderman','spiderman',\r
-'spiderview','spiderview',\r
-'spip','spip',\r
-'sproose_crawler','sproose_crawler',\r
-'spry','spry',\r
-'sqworm','sqworm',\r
-'ssearcher','ssearcher',\r
-'steeler','steeler',\r
-'steroid__download','steroid__download',\r
-'stq_bot','stq_bot',\r
-'Stratagems[\x20]Kumo','Stratagems Kumo',\r
-'suchfin\-bot','suchfin-bot',\r
-'suke','suke',\r
-'summify\.com','summify.com',\r
-'sunrise','sunrise',\r
-'suntek','suntek',\r
-'superbot','superbot',\r
-'superfeedr','superfeedr',\r
-'susie','susie',\r
-'sven','sven',\r
-'syndic8','syndic8',\r
-'syndicapi','syndicapi',\r
-'synoobot','synoobot',\r
-'synthesio','synthesio',\r
-'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','t-h-u-n-d-e-r-s-t-o-n-e',\r
-'tach_bw','tach_bw',\r
-'tagyu_agent','tagyu_agent',\r
-'tailrank','tailrank',\r
-'tarantula','tarantula',\r
-'tarspider','tarspider',\r
-'tcl_http_client_package','tcl_http_client_package',\r
-'techbot','techbot',\r
-'technoratibot','technoratibot',\r
-'templeton','templeton',\r
-'teoma','teoma',\r
-'teragramcrawlersurf','teragramcrawlersurf',\r
-'test_crawler','test_crawler',\r
-'testbot','testbot',\r
-'thumbsniper','thumbsniper',\r
-'titan','titan',\r
-'titin','titin',\r
-'tkwww','tkwww',\r
-'tlspider','tlspider',\r
-'topblogsinfo','topblogsinfo',\r
-'topicblogs','topicblogs',\r
-'topix\.net','topix.net',\r
-'trapit','trapit',\r
-'trileet','trileet',\r
-'turtlescanner','turtlescanner',\r
-'turtle','turtle',\r
-'tutorgigbot','tutorgigbot',\r
-'tweetedtimes','tweetedtimes',\r
-'twiceler','twiceler',\r
-'twisted[\x20]pagegetter','twisted pagegetter',\r
-'twitterbot','twitterbot',\r
-'twitterfeed','twitterfeed',\r
-'ubicrawler','ubicrawler',\r
-'ucsd','ucsd',\r
-'udmsearch','udmsearch',\r
-'ultraseek','ultraseek',\r
-'um\-IC','ubermetrics-technologies.com',\r
-'um\-LN','ubermetrics-technologies.com',\r
-'unchaos_bot_hybrid_web_search_engine','unchaos_bot_hybrid_web_search_engine',\r
-'unido\-bot','unido-bot',\r
-'unisterbot','unisterbot',\r
-'universalfeedparser','universalfeedparser',\r
-'unlost_web_crawler','unlost_web_crawler',\r
-'unwindfetchor','unwindfetchor',\r
-'updated','updated',\r
-'urlck','urlck',\r
-'ustc\-semantic\-group','ustc-semantic-group',\r
-'vagabondo\-wap','vagabondo-wap',\r
-'vagabondo','vagabondo',\r
-'valkyrie','valkyrie',\r
-'vermut','vermut',\r
-'versus_crawler_from_eda\.baykan@epfl\.ch','versus_crawler_from_eda.baykan@epfl.ch',\r
-'verticrawl','verticrawl',\r
-'vespa_crawler','vespa_crawler',\r
-'victoria','victoria',\r
-'virus[_+\s]detector','virus_detector',\r
-'visionsearch','visionsearch',\r
-'voidbot','voidbot',\r
-'voltron','voltron',\r
-'vse/','vse',\r
-'vwbot','vwbot',\r
-'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa','w3c_css_validator_jfouffa',\r
-'w3index','w3index',\r
-'w3m2','w3m2',\r
-'wallpaper','wallpaper',\r
-'wanderer','wanderer',\r
-'wapspider','wapspider',\r
-'wapspIRLider','wapspIRLider',\r
-'watchmouse','watchmouse',\r
-'wavefire','wavefire',\r
-'waybackarchive\.org','waybackarchive.org',\r
-'wazzup','wazzup',\r
-'web_downloader','web_downloader',\r
-'webbandit','webbandit',\r
-'webbase','webbase',\r
-'webcatcher','webcatcher',\r
-'webclipping\.com','webclipping.com',\r
-'webcollage','webcollage',\r
-'webcompass','webcompass',\r
-'webcopy','webcopy',\r
-'webcrawl\.net','webcrawl.net',\r
-'webdup','webdup',\r
-'webfetcher','webfetcher',\r
-'webfilter','webfilter',\r
-'webfoot','webfoot',\r
-'webinator','webinator',\r
-'webindexer','webindexer',\r
-'weblayers','weblayers',\r
-'weblinker','weblinker',\r
-'webminer','webminer',\r
-'webmirror','webmirror',\r
-'webmoose','webmoose',\r
-'webquest','webquest',\r
-'webreader','webreader',\r
-'webreaper','webreaper',\r
-'website[_+\s]monitoring[_+\s]bot','website monitoring bot',\r
-'websnarf','websnarf',\r
-'webspider','webspider',\r
-'webvac','webvac',\r
-'webvulncrawl','webvulncrawl',\r
-'webwalker','webwalker',\r
-'webwalk','webwalk',\r
-'webwatch','webwatch',\r
-'wells_search','wells_search',\r
-'wer\-liefert\-was','wer-liefert-was',\r
-'wesee:search','wesee:search',\r
-'wevikabot','wevikabot',\r
-'whatuseek','whatuseek',\r
-'whowhere','whowhere',\r
-'windows\-rss\-platform','windows-rss-platform',\r
-'wired\-digital','wired-digital',\r
-'zyborg','zyborg',\r
-'wisenutbot','wisenutbot',\r
-'wiumi','wiumi',\r
-'wmir','wmir',\r
-'wolp','wolp',\r
-'wombat','wombat',\r
-'wonderer','wonderer',\r
-'woozweb','woozweb',\r
-'wordpress','wordpress',\r
-'worm','worm',\r
-'wume_crawler','wume_crawler',\r
-'wwwc','wwwc',\r
-'wwweasel','wwweasel',\r
-'wz101','wz101',\r
-'xget','xget',\r
-'xirq','xirq',\r
-'xydo','xydo',\r
-'y!j','y!j',\r
-'yahoo![\x20]searchmonkey','yahoo! searchmonkey',\r
-'yahoo!_mindset','yahoo!_mindset',\r
-'yahoo\-blogs','yahoo-blogs',\r
-'yahoo\-mmcrawler','yahoo-mmcrawler',\r
-'yahoo\-newscrawler','yahoo-newscrawler',\r
-'yahoo[\x20]pipes','yahoo pipes',\r
-'yahoo\-verticalcrawler','yahoo-verticalcrawler',\r
-'yahoocachesystem','yahoocachesystem',\r
-'yahooexternalcache','yahooexternalcache',\r
-'yahoofeedseeker','yahoofeedseeker',\r
-'yahooseeker\-testing','yahooseeker-testing',\r
-'yahooseeker','yahooseeker',\r
-'yahooysmcm','yahooysmcm',\r
-'yammer','yammer',\r
-'yanga','yanga',\r
-'yet\-another\-spider','yet-another-spider',\r
-'yeti','yeti',\r
-'yie8','yie8',\r
-'yodaobot','yodaobot',\r
-'yooglifetchagent','yooglifetchagent',\r
-'youdao','youdao',\r
-'yourls','yourls',\r
-'z\-add_link_checker','z-add_link_checker',\r
-'zealbot','zealbot',\r
-'zemanta','zemanta',\r
-'zend_http_client','zend_http_client',\r
-'zeus','zeus',\r
-'zhuaxia','zhuaxia',\r
-'[^a]fish','[^a]fish',\r
-'[\x20]netseer[\x20]',' netseer ',\r
-'^[1-3]$','^[1-3]$',\r
-'^finbot','^finbot',\r
-'^motorola$','^motorola$',\r
-'^msie','^msie',\r
-'^voyager/','^voyager',\r
-'^webindex$','webindex',\r
-'1\-more_scanner','1-more_scanner',\r
-# below placed at end to catch some generics\r
-'nbot','nbot',\r
-\r
-# Generic robot\r
-'robot','robot',\r
-'blog','blog',\r
-'checker','checker',\r
-'crawl','crawl',\r
-'discover','discover',\r
-'feed','feed',\r
-'fetcher','fetcher',\r
-'hunter','hunter',\r
-'link','link',\r
-'scanner','scanner',\r
-'seek','seek',\r
-'sitemap','sitemap',\r
-'spider','spider',\r
-'sucker','sucker',\r
-'survey','survey',\r
-'validator','validator',\r
-'bot[\s_+:,\.\;\/\\\-]','Unknown robot identified by bot\*',\r
-'[\s_+:,\.\;\/\\\-]bot','Unknown robot identified by \*bot',\r
-'curl','Curl',\r
-'php','A PHP script',\r
-'ruby/','Ruby script',\r
-'no_user_agent','empty user agent string',\r
-# Moving oBot towards the end so it does not pick up other *obot robots\r
-'oBot/','oBot',\r
-# Unknown robots identified by hit on robots.txt\r
-'unknown','Unknown robot (identified by hit on robots.txt)'\r
-);\r
-\r
-\r
-# RobotsAffiliateLib\r
-# This list try to tell by which Search Engine a robot is used\r
-#-------------------------------------------------------------\r
-%RobotsAffiliateLib = (\r
-);\r
-\r
-1;\r
diff --git a/test/awstats/conf/awstats.testnginx.conf b/test/awstats/conf/awstats.testnginx.conf

new file mode 100644 (file)

index 0000000..ffb43cd
--- /dev/null
+++ b/test/awstats/conf/awstats.testnginx.conf
@@ -0,0 +1,806 @@
+# AWStats configure file
+#------------------------------------------------------------------------
+# Copy this file into awstats.www.myserver.mydomain.conf or awstats.conf
+# and edit this new file to setup AWStats.
+# If you don't understand what is a parameter, keep default value.
+#------------------------------------------------------------------------
+
+
+# Main setup section (Required to /test AWStats working)
+#------------------------------------------------------------------------
+
+# "LogFile" contains the web server log file to analyze.
+# Possible values: A full path, or a relative path from awstats.pl directory.
+# Example: "/var/log/apache/access.log"
+# Example: "../logs/mycombinedlog.log"
+# You can also use tags in this filename if you need a dynamic file name
+# depending on date or time (Replacement is made by AWStats at the beginning
+# of its execution). This is available tags :
+#   %YYYY-n  is replaced with 4 digits year we were n hours ago
+#   %YY-n    is replaced with 2 digits year we were n hours ago
+#   %MM-n    is replaced with 2 digits month we were n hours ago
+#   %MO-n    is replaced with 3 letters month we were n hours ago
+#   %DD-n    is replaced with day we were n hours ago
+#   %HH-n    is replaced with hour we were n hours ago
+#   %NS-n    is replaced with number of seconds at 00:00 since 1970
+#   %WM-n    is replaced with the week number in month (1-5)
+#   %Wm-n    is replaced with the week number in month (0-4)
+#   %WY-n    is replaced with the week number in year (01-52)
+#   %Wy-n    is replaced with the week number in year (00-51)
+#   %DW-n    is replaced with the day number in week (1-7, 1=sunday)
+#                              use n=24 if you need (1-7, 1=monday)
+#   %Dw-n    is replaced with the day number in week (0-6, 0=sunday)
+#                              use n=24 if you need (0-6, 0=monday)
+#   Use 0 for n if you need current year, month, day, hour...
+# Example: "/var/log/access_log.%YYYY-0%MM-0%DD-0.log"
+# Example: "C:/WINNT/system32/LogFiles/W3SVC1/ex%YY-24%MM-24%DD-24.log"
+# You can also use a pipe if log file come from a pipe.
+# Example: "gzip -d </var/log/apache/access.log.gz |"
+#
+LogFile="/home/ldestailleur/git/awstats/test/log/testnginx.log"
+
+
+# Enter the log file type you want to analyze.
+# Possible values:
+#  W - For a web log file
+#  M - For a mail log file
+#  F - For a ftp log file
+# Example: W
+# Default: W
+#
+LogType=W
+
+
+# Enter here your log format (Must match your web server config. See setup
+# instructions in documentation know how to configure your web server to have
+# the required log format).
+# Possible values: 1,2,3,4,5 or "your_own_personalized_log_format"
+# 1 - Apache or Lotus Notes/Domino native combined log format (NCSA combined/XLF/ELF log format)
+# 2 - IIS log format (W3C log format)
+# 3 - Webstar native log format
+# 4 - Apache or Squid native common log format (NCSA common/CLF log format)
+#     With LogFormat=4, some features (browsers, os, keywords...) can't work.
+# "your_own_personalized_log_format" = To use AWStats with any not If your log is a personalized format,
+#   you must use the following syntax keys to define the log format string:
+#   %host             Host client name or IP address
+#   %logname          Authenticated login/user used on protected pages
+#   %time1            Date and time with format: [dd/mmm/yyyy:hh:mm:ss +0000]
+#   %time1b           Date and time with format: [dd/mmm/yyyy:hh:mm:ss]
+#   %time2            Date and time with format: yyyy-mm-dd hh:mm:ss
+#   %methodurl        Method and URL with format: "GET /index.html HTTP/x.x"
+#   %methodurlnoprot  Method and URL with format: "GET /index.html"
+#   %method           Method with format: GET
+#   %url              URL only with format: /index.html
+#   %query            Query string (used by URLWithQuery option)
+#   %code             Return code status (with format for web log: 999)
+#   %bytesd           Size of document in bytes
+#   %refererquot      Referer page with format: "http://from.com/from.htm"
+#   %referer          Referer page with format: http://from.com/from.htm
+#   %uaquot           User agent with format: "Mozilla/4.0 (compatible, ...)"
+#   %ua               User agent with format: Mozilla/4.0_(compatible...)
+#   %gzipin           mod_gzip compression input bytes: In:XXX
+#   %gzipout          mod_gzip compression output bytes & ratio: Out:YYY:ZZpct.
+#   %gzipratio        mod_gzip compression ratio: ZZpct.
+#   %deflateratio     mod_deflate compression ratio with format: (ZZ)
+#   %email            EMail sender (for mail log)
+#   %email_r          EMail receiver (for mail log)
+#   %syslog           Syslog-specific time and host stamp with format: Mon dd hh:mm:ss hostname
+#   %virtualname      Web sever virtual hostname. Use this tag when same log
+#                     file contains data of several virtual web servers. The
+#                     SiteDomain will be used to filter the one you want.
+#   If your log format has some fields not included in this list, use
+#   %other            Means another field not used
+#
+# Examples for Apache combined logs (following two examples are equivalent):
+# LogFormat = 1
+# LogFormat = "%host %other %logname %time1 %methodurl %code %bytesd %refererquot %uaquot"
+#
+# Examples for IIS (following two examples are equivalent):
+# LogFormat = 2
+# LogFormat = "%time2 %host %logname %method %url %code %bytesd %other %ua %referer"
+#
+LogFormat="%host %other %logname %time1 %methodurl %code %bytesd %refererquot %uaquot"
+#LogFormat=1
+
+
+# Set this to a directory where you want AWStats to save its working files.
+# Need write permissions by web server user (user "nobody" with Unix OS).
+# Example: "/tmp"
+# Example: "../data"
+# Example: "C:/awstats_working_dir"
+# Default: "."          (means same directory as awstats.pl)
+#
+DirData="/home/ldestailleur/git/awstats/test/awstats/result"   
+
+# Relative or absolute web URL of your awstats.pl directory.
+# Useful only when AWStats is used from command line.
+# Default: "/cgi-bin"   (means awstats.pl is in "/wwwroot/cgi-bin")
+#
+DirCgi="/cgi-bin"
+
+# Relative or absolute web URL of all icons subdirectories.
+# Default: "/icon" (means you must copy icons directories in "/wwwroot/icon")
+#
+DirIcons="/home/ldestailleur/git/awstats/wwwroot/icon"
+
+# If host names are already resolved in your logfile, set this to 0 will
+# increase performances.
+# Possible values: 1 or 0
+# Default: 1
+# 
+DNSLookup=1
+
+
+SiteDomain="__AWSTATS_CURRENT_CONFIG__"
+
+
+# Enter here all other possible domain names, addresses or virtual host
+# aliases someone can use to access your site. Try to keep only the minimum
+# number of possible names/addresses to have the best performances.
+# You can repeat the "SiteDomain" value in this list.
+# This parameter is used to analyze referer field in log file and to help
+# AWStats to know if a referer URL is a local URL of same site or an URL of
+# another site.
+# Note: Use space between each value.
+# Note: You can use regular expression values writing value with REGEX[value].
+# Example: "www.myserver.com localhost 127.0.0.1 REGEX[\.mydomain\.(net|org)$]"
+#
+HostAliases="localhost 127.0.0.1 REGEX[^.*\.myserver\.com$]"
+
+
+
+#------------------------------------------------------------------------
+# Optionnal setup section (Not required but increase AWStats features)
+#------------------------------------------------------------------------
+
+# When this parameter is set to 1, AWStats adds a button on report page to
+# allow to "update" statistics from a web browser. Warning, when "update" is
+# made from a browser, AWStats is run as a CGI by the web server user defined
+# in your web server (user "nobody" by default with Apache, "IUSR_XXX" with
+# IIS), so the "DirData" directory and all already existing history files
+# awstatsMMYYYY[.xxx].txt must be writable by this user. Change permissions if
+# necessary to "Read/Write" (and "Modify" for Windows NTFS file systems).
+# Warning: Update process can be long so you might experience "time out"
+# browser errors if you don't launch AWStats frequently enough.
+# When set to 0, update is only made when AWStats is run from the command
+# line interface (or a task scheduler).
+# Possible values: 0 or 1
+# Default: 0
+#
+AllowToUpdateStatsFromBrowser=1
+
+
+# If AWStats can purge log after processing it. By this way, the next time you
+# launch AWStats, log file will be smaller and processing time will be better.
+# IMPORTANT !!!
+# AWStats is able to find new lines in your log file to process only new ones,
+# so you can launch it as soon as you want, even with this parameter set to 0
+# but if you work with this value, you MUST have something to clean sometimes
+# your logfile if your web server don't do it.
+# Possible values: 1 or 0
+# Default: 0   (but if you can, set this to 1 to increase speed)
+#
+#
+PurgeLogFile=0
+
+
+# When PurgeLogFile is setup to 1, AWStats will clean your log file after
+# processing it. You can however keep an archive file (saved in "DirData") of
+# all processed log records by setting this to 1 (For example if you want to
+# use another log analyzer).
+# This parameter is not used if PurgeLogFile=0
+# Possible values: 1 or 0
+# Default: 0
+#
+ArchiveLogRecords=0
+
+
+# MiscTrackerUrl can be used to make AWStats able to detect some miscellaneous
+# things, that can not be tracked on other way like:
+# - Screen size
+# - Color depth
+# - Java enabled
+# - Macromedia Director plugin
+# - Macromedia Shockwave plugin
+# - Realplayer G2 plugin
+# - QuickTime plugin
+# - Mediaplayer plugin
+# - Acrobat PDF plugin
+# To enable all this features, you must add the following HTML code at the end
+# of your index page (before </BODY>) :
+# <script language=javascript src="/js/awstats_misc_tracker.js"></script>
+# If code is not added in index page, all this detection capabilities will be
+# disabled. You must also check that ShowScreenSizeStats and ShowMiscStats
+# parameters are set to 1 to make results appear in report page.
+# If you change this parameter, you must also change the
+# awstatsmisctrackerurl variable into the awstats_misc_tracker.js file.
+# Change : Effective for new updates only.
+# Possible value: Name of javascript tracker file added in HTML code
+# Default: "/js/awstats_misc_tracker.js"
+#
+MiscTrackerUrl="/js/awstats_misc_tracker.js"
+
+
+# Add here a list of kind of url (file extension) that must be counted as
+# "Hit only" and not as a "Hit" and "Page/Download". You can set here all
+# images extensions as they are hit downloaded that must be counted but they
+# are not viewed pages. URLs with such extensions are not included in the TOP
+# Pages/URL report.
+# Note: If you want to exclude particular URLs from stats (No Pages and no
+# Hits reported), you must use SkipFiles parameter.
+# Change : Effective for new updates only
+# Example: "css js class gif jpg jpeg png bmp ico zip arj gz z wav mp3 wma mpg"
+# Example: ""
+# Default: "css js class gif jpg jpeg png bmp ico"
+#
+NotPageList="css js class gif jpg jpeg png bmp ico"
+
+
+# Default index page name for your web server.
+# Change : Effective for new updates only
+# Example: "index.php index.html default.html"
+# Default: "index.html"
+#
+DefaultFile="index.php index.html"
+
+
+
+
+#------------------------------------------------------------------------
+# Optionnal setup section (Not required but increase AWStats features)
+#------------------------------------------------------------------------
+
+# Set your primary language.
+# Possible value:
+#  Albanian=al, Bosnian=ba, Bulgarian=bg, Catalan=ca,
+#  Chinese (Taiwan)=tw, Chinese (Simpliefied)=cn, Czech=cz, Danish=dk,
+#  Dutch=nl, English=en, Estonian=et, Euskara=eu, Finnish=fi,
+#  French=fr, Galician=gl, German=de, Greek=gr, Hebrew=he, Hungarian=hu,
+#  Icelandic=is, Indonesian=id, Italian=it, Japanese=jp, Korean=kr,
+#  Latvian=lv, Norwegian (Nynorsk)=nn, Norwegian (Bokmal)=nb, Polish=pl,
+#  Portuguese=pt, Portuguese (Brazilian)=br, Romanian=ro, Russian=ru,
+#  Serbian=sr, Slovak=sk,  Spanish=es, Swedish=se, Turkish=tr, Ukrainian=ua,
+#  Welsh=wlk.
+#  First available language accepted by browser=auto
+# Default: "auto"
+#
+Lang="auto"
+
+
+# Do not include access from clients that match following criteria.
+# If your log file contains IP adresses in host field, you must enter here
+# matching IP adresses criteria.
+# If DNS lookup is already done in your log file, you must enter here hostname
+# criteria, else enter ip address criteria.
+# The opposite parameter of "SkipHosts" is "OnlyHosts".
+# Note: Use space between each value. This parameter is not case sensitive.
+# Note: You can use regular expression values writing value with REGEX[value].
+# Change : Effective for new updates only
+# Example: "127.0.0.1 REGEX[^192\.168\.] REGEX[^10\.0\.0\.]"
+# Example: "localhost REGEX[^.*\.localdomain$]"
+# Default: ""
+#
+SkipHosts=""
+
+
+# Do not include access from clients with a user agent that match following
+# criteria. If you want to exclude a robot, you should update the robots.pm
+# file instead of this parameter.
+# Note: Use space between each value. This parameter is not case sensitive.
+# Note: You can use regular expression values writing value with REGEX[value].
+# Change : Effective for new updates only
+# Example: "konqueror REGEX[ua_test_v\d\.\d]"
+# Default: ""
+#
+SkipUserAgents=""
+
+
+# Use SkipFiles to ignore access to URLs that match one of following entries.
+# You can enter a list of not important URLs (like framed menus, hidden pages,
+# etc...) to exclude them from statistics. You must enter here exact relative
+# URL as found in log file, or a matching REGEX value.
+# For example, to ignore /badpage.html, just add "/badpage.html". To ignore
+# all pages in a particular directory, add "REGEX[^\/directorytoexclude]".
+# The opposite parameter of "SkipFiles" is "OnlyFiles".
+# Note: Use space between each value. This parameter is not case sensitive.
+# Note: You can use regular expression values writing value with REGEX[value].
+# Change : Effective for new updates only
+# Example: "/badpage.html REGEX[^\/excludedirectory]"
+# Default: ""
+#
+SkipFiles=""
+
+
+# Some web servers on some Operating systems (IIS-Windows) considers that a
+# login with same value but different case are the same login. To tell AWStats
+# to also considers them as one, set this parameter to 1.
+# Possible values: 0 or 1
+# Default: 0
+# 
+AuthenticatedUsersNotCaseSensitive=1
+
+
+# Keep or remove the anchor string you can find in some URLs.
+# Possible values: 0 or 1
+# Default: 0
+#
+URLWithAnchor=0
+
+
+# In URL links, "?" char is used to add parameter's list in URLs. Syntax is:
+# /mypage.html?param1=value1&param2=value2
+# However, some servers/sites use also others chars to isolate dynamic part of
+# their URLs. You can complete this list with all such characters.
+# Change : Effective for new updates only
+# Example: "?;,"
+# Default: "?;"
+#
+URLQuerySeparators="?;"
+
+
+# Keep or remove the query string to the URL in the statistics for individual
+# pages. This is primarily used to differentiate between the URLs of dynamic
+# pages. If set to 1, mypage.html?id=x and mypage.html?id=y are counted as two
+# different pages.
+# Warning, when set to 1, memory required to run AWStats is dramatically
+# increased if you have a lot of changing URLs (for example URLs with a random
+# id inside). Such web sites should not set this option to 1 or use seriously
+# the next parameter URLWithQueryWithOnlyFollowingParameters (or eventually 
+# URLWithQueryWithoutFollowingParameters).
+# Change : Effective for new updates only
+# Possible values:
+# 0 - URLs are cleaned from the query string (ie: "/mypage.html")
+# 1 - Full URL with query string is used     (ie: "/mypage.html?p=x&q=y")
+# Default: 0
+# 
+URLWithQuery=1
+
+
+# When URLWithQuery is on, you will get the full URL with all parameters in
+# URL reports. But among thoose parameters, sometimes you don't need a
+# particular parameter because it does not identify the page or because it's
+# a random ID changing for each access even if URL points to same page. In
+# such cases, it is higly recommanded to ask AWStats to keep only parameters
+# you need (if you know them) before counting, manipulating and storing it.
+# Enter here list of wanted parameters. For example, with "param", one hit on
+# /mypage.cgi?param=abc&id=Yo4UomP9d and /mypage.cgi?param=abc&id=Mu8fdxl3r
+# will be reported as 2 hits on /mypage.cgi?param=abc
+# This parameter is not used when URLWithQuery is 0 and can't be used with
+# URLWithQueryWithoutFollowingParameters.
+# Change : Effective for new updates only
+# Example: "param"
+# Default: ""
+# 
+URLWithQueryWithOnlyFollowingParameters=""
+
+
+# When URLWithQuery is on, you will get the full URL with all parameters in
+# URL reports. But among thoose parameters, sometimes you don't need a
+# particular parameter because it does not identify the page or because it's
+# a random ID changing for each access even if URL points to same page. In
+# such cases, it is higly recommanded to ask AWStats to remove such parameters
+# from the URL before counting, manipulating and storing it. Enter here list
+# of all non wanted parameters. For example if you enter "id", one hit on
+# /mypage.cgi?p=abc&id=Yo4UomP9d and /mypage.cgi?p=abc&id=Mu8fdxl3r
+# will be reported as 2 hits on /mypage.cgi?p=abc
+# This parameter is not used when URLWithQuery is 0 and can't be used with
+# URLWithQueryWithOnlyFollowingParameters.
+# Change : Effective for new updates only
+# Example: "PHPSESSID jsessionid"
+# Default: ""
+# 
+URLWithQueryWithoutFollowingParameters="productId jsessionid"
+
+
+# Keep or remove the query string to the referrer URL in the statistics for
+# external referrer pages. This is used to differentiate between the URLs of
+# dynamic referrer pages. If set to 1, mypage.html?id=x and mypage.html?id=y
+# are counted as two different referrer pages.
+# Change : Effective for new updates only
+# Possible values:
+# 0 - Referrer URLs are cleaned from the query string (ie: "/mypage.html")
+# 1 - Full URL with query string is used      (ie: "/mypage.html?p=x&q=y")
+# Default: 0
+# 
+URLReferrerWithQuery=0
+
+
+# AWStats can detect setup problems or show you important informations to have
+# a better use. Keep this to 1, except if AWStats says you can change it.
+# Possible values: 1 or 0
+# Default: 1
+WarningMessages=1
+
+
+
+#-----------------------------------------------------------------------------
+# OPTIONAL ACCURACY SETUP SECTION (Not required but increase AWStats features)
+#-----------------------------------------------------------------------------
+
+# Following values allows you to define accuracy of AWStats entities (robots,
+# browsers, os, referers, file types) detection.
+# It is recommanded that very important web sites or ISP that provides AWStats
+# to their customer set this parameter to 1 (or 0), instead of 2.
+# Possible values:
+#  0 = No detection,
+#  1 = Medium/Standard detection
+#  2 = Full detection
+# Change : Effective for new updates only
+# Default: 2 (0 for LevelForWormsDetection)
+#
+LevelForBrowsersDetection=2                            # 0 disables Browsers detection.
+LevelForOSDetection=2                                  # 0 disables OS detection.
+LevelForRefererAnalyze=2                               # 0 disables Origin detection.
+LevelForRobotsDetection=2                              # 0 disables Robots detection.
+LevelForSearchEnginesDetection=2               # 0 disables Search engines detection.
+LevelForKeywordsDetection=2                            # 0 disables Keyphrases/Keywords detection.
+LevelForFileTypesDetection=1                   # 0 disables File types detection.
+LevelForWormsDetection=2                               # 0 disables Worms detection.
+
+
+
+#-----------------------------------------------------------------------------
+# OPTIONAL APPEARANCE SETUP SECTION (Not required but increase AWStats features)
+#-----------------------------------------------------------------------------
+
+# When you use AWStats as a CGI, you can have the reports shown in HTML frames.
+# Frames are only available for report viewed dynamically. When you build
+# pages from command line, this option is not used and no frames are built.
+# Possible values: 0 or 1
+# Default: 1
+#
+UseFramesWhenCGI=1
+
+
+# Each URL shown in stats page are links you can click.
+# Possible values: 1 or 0
+# Default: 1
+#
+ShowLinksOnUrl=1
+
+
+# List of visible flags that links to other language translations.
+# See Lang parameter for list of allowed flag/language codes.
+# If you don't want any flag link, set ShowFlagLinks to "".
+# This parameter is used only if ShowMenu parameter is set to 1.
+# Possible values: "" or "language_codes_separated_by_space"
+# Example: "en es fr nl es"
+# Default: ""
+#
+ShowFlagLinks="fr"
+
+
+# Search engines keywords reported are full search string or separate keywords
+# Possible values:
+# 0 - Search keywords reported are full search string (ie: "town maps")
+# 1 - Search keywords reported are separated words (ie: "town" and "maps")
+# Default: 0
+#
+SplitSearchString=0
+
+
+# You can put here HTML code that will be added at the end of AWStats reports.
+# Great to add advert ban.
+# Default: ""
+#
+HTMLEndSection=""
+
+
+# Value of maximum bar width/heigth for horizontal/vertical graphics bar
+# Default: 260/220
+#
+BarWidth   = 260
+BarHeight  = 220
+
+ 
+# This value can be used to choose maximum number of lines shown for each 
+# particular reporting.
+#
+# Stats by domains
+MaxNbOfDomain = 25
+# Stats by hosts
+MaxNbOfHostsShown = 25
+MinHitHost    = 1
+# Stats by authenticated users
+MaxNbOfLoginShown = 5
+MinHitLogin   = 1
+# Stats by robots
+MaxNbOfRobotShown = 25
+MinHitRobot   = 1
+# Stats by pages
+MaxNbOfPageShown = 25
+MinHitFile    = 1
+# Stats by referers
+MaxNbOfRefererShown = 25
+MinHitRefer   = 1
+# Stats for keywords
+MaxNbOfKeywordsShown = 25
+MinHitKeyword  = 1
+
+
+ShowHeader=1                           # Show AWStats head title and icon
+ShowMenu=1                                     # Show menu header with links on detailed reports
+ShowMonthDayStats=1
+ShowDaysOfWeekStats=1
+ShowHoursStats=1
+ShowDomainsStats=1
+ShowHostsStats=1
+ShowAuthenticatedUsers=1
+ShowRobotsStats=1
+ShowPagesStats=1
+ShowCompressionStats=0         # Show report of compression stats when using mod_gzip
+ShowFileTypesStats=1
+ShowFileSizesStats=0           # Not yet available
+ShowBrowsersStats=1
+ShowOSStats=1
+ShowOriginStats=1
+ShowKeyphrasesStats=1
+ShowKeywordsStats=1
+ShowHTTPErrorsStats=1
+ShowWormsStats=1
+# Show misc chart
+# Default: a (See also MiscTrackerUrl parameter), Possible codes: ajdfrqwp
+ShowMiscStats=ajdfrqwp
+ShowScreenSizeStats=1
+
+
+# In the Origin chart, you have stats on where your hits came from. You can
+# includes hits on pages that comes from pages of same sites in this chart.
+# Possible values: 0 or 1
+# Default: 0
+#
+IncludeInternalLinksInOriginSection=1
+
+
+
+#-----------------------------------------------------------------------------
+# PLUGINS
+#-----------------------------------------------------------------------------
+
+# Add here all plugin files you want to load.
+# Plugin files must be .pm files stored in 'plugins' directory.
+# Uncomment LoadPlugin lines to enable a plugin after checking that perl
+# modules required by the plugin are installed.
+
+# PLUGIN: Tooltips
+# REQUIRED MODULES: None
+# PARAMETERS: None
+# DESCRIPTION: Add tooltips pop-up help boxes to HTML report pages.  
+# NOTE: This will increased HTML report pages size, thus server load and bandwidth.
+#
+#LoadPlugin="tooltips"
+
+# PLUGIN: DecodeUTFKeys
+# REQUIRED MODULES: Encode and URI::Escape
+# PARAMETERS: None
+# DESCRIPTION: Allow AWStats to show correctly (in language charset) 
+# keywords/keyphrases strings even if they were UTF8 coded by the 
+# referer search engine.
+#
+#LoadPlugin="decodeutfkeys"
+
+# PLUGIN: IPv6
+# PARAMETERS: None
+# REQUIRED MODULES: Net::IP and Net::DNS
+# DESCRIPTION: This plugin gives AWStats capability to make reverse DNS
+# lookup on IPv6 addresses.
+#
+#LoadPlugin="ipv6"
+
+# PLUGIN: HashFiles
+# REQUIRED MODULES: Storable
+# PARAMETERS: None
+# DESCRIPTION: AWStats DNS cache files are read/saved as native hash files. 
+# This increases DNS cache files loading speed, above all for very large web sites.
+#
+#LoadPlugin="hashfiles"
+
+
+# PLUGIN: UserInfo
+# REQUIRED MODULES: None
+# PARAMETERS: None
+# DESCRIPTION: Add a text (Firtname, Lastname, Office Department, ...) in 
+# authenticated user reports for each login value.
+# A text file called userinfo.myconfig.txt, with two fields (first is login,
+# second is text to show, separated by a tab char) must be created in DirData
+# directory.
+#
+#LoadPlugin="userinfo"
+
+# PLUGIN: HostInfo
+# REQUIRED MODULES: Net::XWhois
+# PARAMETERS: None
+# DESCRIPTION: Add a column into host chart with a link to open a popup window that shows
+# info on host (like whois records).
+#
+#LoadPlugin="hostinfo"
+
+# PLUGIN: ClusterInfo
+# REQUIRED MODULES: None
+# PARAMETERS: None
+# DESCRIPTION: Add a text (for example a full hostname) in cluster reports for each cluster
+# number. A text file called clusterinfo.myconfig.txt, with two fields (first is
+# cluster number, second is text to show) separated by a tab char. must be
+# created into DirData directory.
+# Note this plugin is useless if ShowClusterStats is set to 0 or if you don't
+# use a personalized log format that contains %cluster tag.
+#
+#LoadPlugin="clusterinfo"
+
+# PLUGIN: UrlAliases
+# REQUIRED MODULES: None
+# PARAMETERS: None
+# DESCRIPTION: Add a text (Page title, description...) in URL reports before URL value.
+# A text file called urlalias.myconfig.txt, with two fields (first is URL,
+# second is text to show, separated by a tab char) must be created into
+# DirData directory.
+#
+#LoadPlugin="urlalias"
+
+# PLUGIN: TimeHiRes
+# REQUIRED MODULES: Time::HiRes (if Perl < 5.8)
+# PARAMETERS: None
+# DESCRIPTION: Time reported by -showsteps option is in millisecond. For debug purpose.
+#
+#LoadPlugin="timehires"                
+
+# PLUGIN: TimeZone
+# REQUIRED MODULES: Time::Local
+# PARAMETERS: [timezone offset]
+# DESCRIPTION: Allow AWStats to adjust time stamps for a different timezone
+# This plugin reduces AWStats speed of 10% !!!!!!!
+# LoadPlugin="timezone"
+# LoadPlugin="timezone +2"
+# LoadPlugin="timezone CET"
+#
+#LoadPlugin="timezone +2"
+
+# PLUGIN: Rawlog
+# REQUIRED MODULES: None
+# PARAMETERS: None
+# DESCRIPTION: This plugin adds a form in AWStats main page to allow users to see raw
+# content of current log files. A filter is also available.
+#
+#LoadPlugin="rawlog"
+
+# PLUGIN: GraphApplet
+# REQUIRED MODULES: None
+# PARAMETERS: [CSS classes to override]
+# DESCRIPTION: Supported charts are built by a 3D graphic applet.
+#
+#LoadPlugin="graphapplet /awstatsclasses"                              # EXPERIMENTAL FEATURE
+
+# PLUGIN: GraphGoogleChartAPI
+# REQUIRED MODULES: None
+# PARAMETERS: None
+# DESCRIPTION: Replaces the standard charts with free Google API generated images 
+# in HTML reports. If country data is available and more than one country has hits, 
+# a map will be generated using Google Visualizations.
+# Note: The machine where reports are displayed must have Internet access for the 
+# charts to be generated. The only data sent to Google includes the statistic numbers, 
+# legend names and country names.
+# Warning: This plugin is not compatible with option BuildReportFormat=xhtml. 
+#
+#LoadPlugin="graphgooglechartapi"
+
+# PLUGIN: GeoIPfree
+# REQUIRED MODULES: Geo::IPfree version 0.2+ (from Graciliano M.P.)
+# PARAMETERS: None
+# DESCRIPTION: Country chart is built from an Internet IP-Country database.
+# This plugin is useless for intranet only log files.
+# Note: You must choose between using this plugin (need Perl Geo::IPfree
+# module, database is free but not up to date) or the GeoIP plugin (need
+# Perl Geo::IP module from Maxmind, database is also free and up to date).
+# Note: Activestate provide a corrupted version of Geo::IPfree 0.2 Perl
+# module, so install it from elsewhere (from www.cpan.org for example).
+# This plugin reduces AWStats speed by up to 10% !
+#
+#LoadPlugin="geoipfree"
+
+# MAXMIND GEO IP MODULES: Please see documentation for notes on all Maxmind modules
+
+# PLUGIN: GeoIP
+# REQUIRED MODULES: Geo::IP or Geo::IP::PurePerl (from Maxmind)
+# PARAMETERS: [GEOIP_STANDARD | GEOIP_MEMORY_CACHE] [/pathto/geoip.dat] </pathto/override.txt>
+# DESCRIPTION: Builds a country chart and adds an entry to the hosts 
+# table with country name
+# Replace spaces in the path of geoip data file with string "%20".
+#
+LoadPlugin="geoip GEOIP_STANDARD /home/ldestailleur/git/awstats/test/maxmind/GeoIP.dat"
+
+# PLUGIN: GeoIP_City_Maxmind
+# REQUIRED MODULES: Geo::IP or Geo::IP::PurePerl (from Maxmind)
+# PARAMETERS: [GEOIP_STANDARD | GEOIP_MEMORY_CACHE] [/pathto/GeoIPCity.dat] </pathto/override.txt>
+# DESCRIPTION: This plugin adds a column under the hosts field and tracks the pageviews
+# and hits by city including regions.
+# Replace spaces in the path of geoip data file with string "%20".
+#
+LoadPlugin="geoip_city_maxmind GEOIP_STANDARD /home/ldestailleur/git/awstats/test/maxmind/GeoIPCity-532.dat"
+
+# PLUGIN: GeoIP_ASN_Maxmind
+# REQUIRED MODULES: Geo::IP or Geo::IP::PurePerl (from Maxmind)
+# PARAMETERS: [GEOIP_STANDARD | GEOIP_MEMORY_CACHE] [/pathto/GeoIPASN.dat[+/pathto/override.txt][+http://linktoASlookup]]
+# DESCRIPTION: This plugin adds a chart of AS numbers where the host IP address is registered. 
+# This plugin can display some ISP information if included in the database. You can also provide 
+# a link that will be used to lookup additional registration data. Put the link at the end of 
+# the parameter string and the report page will include the link with the full AS number at the end.
+# Replace spaces in the path of geoip data file with string "%20".
+#
+#LoadPlugin="geoip_asn_maxmind GEOIP_STANDARD /home/ldestailleur/git/awstats/test/maxmind/GeoIP.dat+http://enc.com.au/itools/aut-num.php?autnum="
+
+# PLUGIN: GeoIP_Region_Maxmind
+# REQUIRED MODULES: Geo::IP or Geo::IP::PurePerl (from Maxmind)
+# PARAMETERS: [GEOIP_STANDARD | GEOIP_MEMORY_CACHE] [/pathto/GeoIPRegion.dat]
+# DESCRIPTION:This plugin adds a chart of hits by regions. Only regions for US and 
+# Canada can be detected.
+# Replace spaces in the path of geoip data file with string "%20".
+#
+LoadPlugin="geoip_region_maxmind GEOIP_STANDARD /home/ldestailleur/git/awstats/test/maxmind/GeoIPRegion-515.dat"
+
+# PLUGIN: GeoIP_ISP_Maxmind
+# REQUIRED MODULES: Geo::IP or Geo::IP::PurePerl (from Maxmind)
+# PARAMETERS: [GEOIP_STANDARD | GEOIP_MEMORY_CACHE] [/pathto/GeoIPISP.dat]
+# DESCRIPTION: This plugin adds a chart of hits by ISP.
+# Replace spaces in the path of geoip data file with string "%20".
+#
+#LoadPlugin="geoip_isp_maxmind GEOIP_STANDARD /home/ldestailleur/git/awstats/test/maxmind/GeoIPISP-122.dat"
+
+# PLUGIN: GeoIP_Org_Maxmind
+# REQUIRED MODULES: Geo::IP or Geo::IP::PurePerl (from Maxmind)
+# PARAMETERS: [GEOIP_STANDARD | GEOIP_MEMORY_CACHE] [/pathto/GeoIPOrg.dat]
+# DESCRIPTION: This plugin add a chart of hits by Organization name
+# Replace spaces in the path of geoip data file with string "%20".
+#
+LoadPlugin="geoip_org_maxmind GEOIP_STANDARD /home/ldestailleur/git/awstats/test/maxmind/GeoIPOrg-111.dat"
+
+
+
+#-----------------------------------------------------------------------------
+# EXTRA SECTION
+#-----------------------------------------------------------------------------
+
+# WARNING: Extra sections are experimental feature not stable yet !!!
+
+# You can define your own charts, you choose here what are rows and columns
+# keys. This feature is particularly useful for marketing purpose, tracking
+# products orders for example.
+# For this, edit all parameters of Extra section. Each set of parameter is a
+# different chart. For several charts, duplicate section changing the number.
+# Note that each Extra section reduces AWStats speed by 10%.
+#
+# WARNING: A wrong setup of Extra section can result in a too large arrays
+# that will consume all your memory, making AWStats unusable after several
+# updates, so be sure to setup it correctly.
+# In most cases, you don't need this feature.
+#
+# ExtraSectionNameX is title of your personalized chart.
+# ExtraSectionConditionalX are conditions on URL and/or QUERY_STRING and/or
+#   REFERER you can use to count or not the hit. Use "|" for "OR".
+# ExtraSectionFirstColumnTitleX is the first column title of the chart.
+# ExtraSectionFirstColumnValuesX is a Regex string to tell AWStats how to 
+#   extract the value used for first column. Each different value found will
+#   be a different row. Be sure that list of different values is "limited" to
+#   avoid "not enough memory" problems !
+# ExtraSectionStatTypesX are things you want to count. You can use standard
+#   code letters (P for pages,H for hits,B for bandwidth,L for last access).
+# MaxNbOfExtraX is maximum number of rows shown in chart.
+# MinHitExtraX is minimum number of hits required to be shown in chart.
+#
+
+# Example to report the 20 products the most ordered by "order.cgi" script
+ExtraSectionName1="Product orders"
+ExtraSectionCondition1="URL,\/cgi\-bin\/order\.cgi|URL,\/cgi-bin2\/order\.cgi"
+ExtraSectionFirstColumnTitle1="Product ID"
+ExtraSectionFirstColumnValues1="QUERY_STRING,productId=([^&]+)"
+ExtraSectionStatTypes1=PL
+MaxNbOfExtra1=20
+MinHitExtra1=1
+
+
+ExtraSectionName2="Redirect"
+ExtraSectionCondition2="URL,\/cgi\-bin\/awredir\.pl"
+ExtraSectionFirstColumnTitle2="Url"
+ExtraSectionFirstColumnValues2="QUERY_STRING,url=([^&]+)"
+ExtraSectionStatTypes2=HL
+MaxNbOfExtra2=20
+MinHitExtra2=1
diff --git a/test/test.pl b/test/test.pl

index 9655d1a10f56cc0fcf7569a550462cda850c79e2..5251bd488ffa163180b48dad3a27d581fa29cd8c 100755 (executable)
--- a/test/test.pl
+++ b/test/test.pl
@@ -13,38 +13,42 @@ $PERL="perl";
  
  @TESTLIST=(
  "testglobal",
-"testlogins",
-"testworms",
-"testipv6",
-"testdnsdone",
-"testextra",
-"testgeoip",
-"testgeoip_region_maxmind",
-"testgeoip_city_maxmind",
+"testsmall",
+"testnginx",
+"testtime5",
+#"testlogins",
+#"testworms",
+#"testipv6",
+#"testdnsdone",
+#"testextra",
+#"testgeoip",
+#"testgeoip_region_maxmind",
+#"testgeoip_city_maxmind",
  "testgeoip_isp_maxmind",
-"testgeoip_org_maxmind",
-"testrobot",
-"benchmark",
-"testmoddeflate","testmodgzip","testmodgzip2","testmodgzip3",
-"testurlwithquery",
-"testwindowsmediaserver","testwindowsmediaserver9","testrealmediaserver","testdarwinserver",
-"testsquidextended",
-"testisa1",
-"testisa2",
-"testlotus",
-"testlotus65",
-"testwebstar",
-"testzope",
-"testcluster",
-"testoracle9ias",
-"testproftp","testproftp2","testvsftpd",
-"testskipfiles",
-"testvirtualhosts",
-"testsendmail",
-"testpostfix",
-"testpostfix1",
-"testpostfix4",
-"testexchange");
+#"testgeoip_org_maxmind",
+#"testrobot",
+#"benchmark",
+#"testmoddeflate","testmodgzip","testmodgzip2","testmodgzip3",
+#"testurlwithquery",
+#"testwindowsmediaserver","testwindowsmediaserver9","testrealmediaserver","testdarwinserver",
+#"testsquidextended",
+#"testisa1",
+#"testisa2",
+#"testlotus",
+#"testlotus65",
+#"testwebstar",
+#"testzope",
+#"testcluster",
+#"testoracle9ias",
+#"testproftp","testproftp2","testvsftpd",
+#"testskipfiles",
+#"testvirtualhosts",
+#"testsendmail",
+#"testpostfix",
+#"testpostfix1",
+#"testpostfix4",
+#"testexchange"
+);
  
  #@TESTLIST=("testglobal","testsmall","testtime5");
  #@TESTLIST=("testlogins");
@@ -88,7 +92,7 @@ while(1==1)
  {
         
         print "Choose test to execute...\n";
-       sprintf("$02i %s",0,"All");
+       sprintf("$2i %s", 0, "All");
         my $i=1;
         foreach my $key (@TESTLIST) {
             print sprintf("%02i) %s\n",$i,$key);
@@ -106,7 +110,7 @@ while(1==1)
         else { push @chosen, $TESTLIST[$bidon-1]; }
         
         # Option output
-       print "Choose output option (browserdetail, osdetail, ...)\n";
+       print "Choose output option ('', 'browserdetail', 'osdetail', ...)\n";
         $bidon='';
         print "Your choice : ";
         $bidon=<STDIN>;
author	Laurent Destailleur <eldy@destailleur.fr>
	Sun, 28 Jul 2024 13:42:06 +0000 (15:42 +0200)
committer	Laurent Destailleur <eldy@destailleur.fr>
	Sun, 28 Jul 2024 13:42:06 +0000 (15:42 +0200)
robots.pm	[deleted file]	patch \| blob \| blame \| history
test/awstats/conf/awstats.testnginx.conf	[new file with mode: 0644]	patch \| blob
test/test.pl		patch \| blob \| blame \| history