]> git.ipfire.org Git - thirdparty/AWStats.git/commitdiff
corrected some old errors in file.
authorvisualperception <3543160+visualperception@users.noreply.github.com>
Sun, 13 Jun 2021 16:04:06 +0000 (17:04 +0100)
committerGitHub <noreply@github.com>
Sun, 13 Jun 2021 16:04:06 +0000 (17:04 +0100)
Not sure why these old errors are still in the develop branch robots.pm file but they were.
Specifically:
Mediapartners-Google
laserlikebot
MojeekBot and mojeek
nbot put at end so it doesn't conflict with other robots containing nbot in their name

robots.pm [new file with mode: 0644]

diff --git a/robots.pm b/robots.pm
new file mode 100644 (file)
index 0000000..7328e92
--- /dev/null
+++ b/robots.pm
@@ -0,0 +1,2786 @@
+# AWSTATS ROBOTS DATABASE\r
+#-------------------------------------------------------\r
+# If you want to add robots to extend AWStats database detection capabilities,\r
+# you must add an entry in RobotsSearchIDOrder_listx and RobotsHashIDLib.\r
+\r
+# The entry in RobotsSearchIDOrder_listx is a Perl regular expression\r
+# (see http://perldoc.perl.org/perlreref.html). AWSTats applies these\r
+# expressions to the user agent string in the order given by the lists. The\r
+# first match specifies the robot.\r
+#\r
+# Note: This regular expression must not contain any whitespace.\r
+# Otherwise AWStats will produce lines in the database that\r
+# will be misinterpreted and as a consequence the corresponding data in the\r
+# generated HTML reports will be wrong. If you want to match whitespace in\r
+# the user agent string, use other constructs like '\s', '[:blank:]',\r
+# '\p{IsSpace}', '\x20' etc.\r
+#\r
+# The corresponding entry in RobotsHashIDLib contains the regular expression\r
+# as key, followed by a string containing HTML-text. AWStats inserts this\r
+# text into reports to describe the bot. If possible the text should contain\r
+# a link to the bot home page. This makes it easier for sysadmins to find\r
+# the information necessary e.g. to adapt the robots.txt file.\r
+#\r
+# An entry in the RobotsAffiliateLib is not necessary. An entry in this list\r
+# contains as first part the regular expression specifying the bot. The\r
+# second part is a string that gives the Company or product managing the bot.\r
+# This information is not used yet.\r
+#\r
+# There are several sorts of bots that AWStats is not able to detect and\r
+# therefore a considerable amount of bot generated traffic counts\r
+# as user traffic:\r
+#\r
+# a) A crawler that identifies itself in the referrer string, but not in\r
+#    the user agent string. An example is the crawler from semalt.semalt.com.\r
+#\r
+# b) Crawlers that correctly access robots.txt but identify themselves in\r
+#    in the user agent string only once or just a few times. Most of the\r
+#    time a user agent string ist used that does not contain hints that\r
+#    a bot is involved. An example is the iCjobs spider.\r
+#    msnbot-UDiscovery/2.0b seems to show this behaviour too.\r
+#\r
+#\r
+#\r
+#-------------------------------------------------------\r
+\r
+# 2021-05--05 RobC\r
+\r
+# Removed Baidu catchall because its picking up baidu.sogo.uc.UCBrowser which is a phone browser\r
+# Added baiduspider- catchall instead\r
+\r
+# Newly added from 2021-05-05\r
+# Adsbot\r
+# BW/\r
+# Bytespider\r
+# CheckMarkNetwork/\r
+# DuckDuckBot\r
+# # Foregenix Web Scan\r
+# IonCrawl\r
+# Linguee Bot\r
+# Neevabot\r
+# PetalBot\r
+# TkBot\r
+# vuhuvBot\r
+\r
+\r
+# 2018-03-13 RobC \r
+#              Added 36 robots and one generic ( survey ) using v 7.7 robots file as base. \r
+#              Also moved robot "Obot" into generics so that it is singled out as an individual Robot.         \r
+#\r
+# 2016-09-02 RobC \r
+#              Fixed a few errors and added a few missing bots from awstats 7.5 release.\r
+#\r
+# 2016-08-28 RobC \r
+#              Complete re-build of this file almost from scratch.\r
+#              dropped many old bots, added many new bots and reordered file.\r
+#              edited and added regex expressions to stop spaces causing problems.\r
+#              You should tune file by placing the most common robots crawling your site at top \r
+#              in List1.\r
+#\r
+#\r
+#              N.B. many bots need to be in correct order so don't chnage order without checking if\r
+#              change will cause counts to be allocated to wrong bot. Not always simple.\r
+#\r
+#\r
+# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html\r
+#              added dipsie (not tested with real data).\r
+#              added DomainsDB.net http://domainsdb.net/\r
+#              added ia_archiver-web.archive.org (was inadvertently grouped with Alexa traffic)\r
+#              added Nutch (used by looksmart (furl?))\r
+#              added rssImagesBot\r
+#              added Sqworm\r
+#              added t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e\r
+#              added w3c css-validator\r
+#              added documentation link to bot home pages for above and selected major bots.\r
+#                    In the case of international bots, choose .com page.\r
+#                    Included tool tip (html "title").\r
+#                    To do: parameterize to match both AWStats language and tooltips settings.\r
+#                    To do: add html links for all bots based on current documentation in source\r
+#                           files referenced below.\r
+#              changed '\wbot[\/\-]', to '\wbot[\/\-]' (removed comma)\r
+#              made minor grammar corrections to notes below\r
+# 2005-08-24   added YahooSeeker-Testing\r
+#                      added w3c-checklink\r
+#                      updated url for ask.com\r
+# 2005-08-24           added Girafabot http://www.girafa.com/\r
+# 2005-08-30           added PluckFeedCrawler http://www.pluck.com/\r
+#              added Gaisbot/3.0 (robot05@gais.cs.ccu.edu.tw; )\r
+#              dded geniebot (wgao@genieknows.com)\r
+#              added BecomeBot link http://www.become.com/site_owners.html\r
+#              added topicblogs http://www.topicblogs.com/\r
+#              added Powermarks; seen used by referrer spam\r
+#              added YahooSeeker\r
+#              added NG/2. http://www.exabot.com/\r
+# 2005-09-15   added link for Walhello appie\r
+#              added bender focused_crawler\r
+#              updated YahooSeeker description (blog crawler)\r
+# 2005-09-16   added link for http://linkchecker.sourceforge.net\r
+#              added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl)\r
+#              added Blogslive  info@blogslive.com intelliseek.com\r
+#              added BlogPulse (ISSpider-3.0) intelliseek.com\r
+# 2005-09-26   added Feedfetcher-Google (http://www.google.com/feedfetcher.html)\r
+#              added EverbeeCrawler\r
+#              added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html\r
+#              added link for Bloglines http://www.bloglines.com\r
+# 2005-10-19   fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html)\r
+#              added Blogshares Spiders (Synchronized V1.5.1)\r
+#              added yacy\r
+# 2005-11-21   added Argus www.simpy.com\r
+#              added BlogsSay :: RSS Search Crawler (http://www.blogssay.com/)\r
+#              added MJ12bot http://majestic12.co.uk/bot.php\r
+#              added OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)\r
+#              added OutfoxBot/0.3 (For internet experiments; outfox.agent@gmail.com)\r
+#              added RufusBot Rufus Web Miner http://64.124.122.252.webaroo.com/feedback.html\r
+#              added Seekbot (http://www.seekbot.net/bot.html)\r
+#              added Yahoo-MMCrawler/3.x (mms-mmcrawler-support@yahoo-inc.com)\r
+#               added link for BaiDuSpider\r
+#              added link for Blogshares Spider\r
+#              added link for StackRambler http://www.rambler.ru/doc/faq.shtml\r
+#              added link for WISENutbot\r
+#              added link for ZyBorg/1.0 (wn-14.zyborg@looksmart.net; http://www.WISEnutbot.com.  Moved location to above wisenut to avoid classification as wisenut\r
+# 2005-12-15\r
+#              added FAST Enteprise Crawler/6 (www dot fastsearch dot com). Note spelling Enteprise not Enterprise.\r
+#              added findlinks http://wortschatz.uni-leipzig.de/findlinks/\r
+#              added IBM Almaden Research Center WebFountainâ„¢ http://www.almaden.ibm.com/cs/crawler [hc3]\r
+#              added INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)\r
+#              added lmspider (lmspider@scansoft.com) http://www.nuance.com/\r
+#              added noxtrumbot http://www.noxtrum.com/\r
+#              added SandCrawler (Microsoft)\r
+#              added SBIder http://www.sitesell.com/sbider.html\r
+#              added SeznamBot http://fulltext.seznam.cz/\r
+#              added sohu-search http://corp.sohu.com/ (looked for //robots.txt not /robots.txt)\r
+#              added the ruffle SemanticWeb crawler v0.5 - http://www.unreach.net\r
+#              added WebVulnCrawl/1.0 libwww-perl/5.803 (looked for //robots.txt not /robots.txt)\r
+#              added Yahoo! Japan keyoshid http://www.yahoo.co.jp/\r
+#              added Y!J http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html\r
+#              added link for GigaBot\r
+#              added link for MagpieRSS\r
+#              added link for MSIECrawler\r
+# 2005-12-21\r
+#              added aipbot http://www.aipbot.com aipbot@aipbot.com [matthys70 users.sourceforge.net]\r
+#              added Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)\r
+#              added Fast-Search-Engine http://www.fast-search-engine.com/ [matthys70  users.sourceforge.net]\r
+#              added g2Crawler (nobody@airmail.net) http://crawler.instantnetworks.net/\r
+#              added Jakarta commons-httpclient http://jakarta.apache.org/commons/httpclient/ (hit robots.txt).  May be used as robot or browser - a site may want to remove this entry.\r
+#              added OmniExplorer_Bot http://www.omni-explorer.com/ [matthys70 users.sourceforge.net]\r
+#              added USTC-Semantic-Group ai.ustc.edu.cn/mas/en/research/index.php ?\r
+# 2005-12-22\r
+#              added EARTHCOM.info www.earthcom.info\r
+#              added HTTrack off-line browser 'httrack','HTTrack', http://www.httrack.com/ [Moizes Gabor]\r
+#              added KummHttp http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b [Moizes Gabor]\r
+# 2006-01-01\r
+#              added Dulance http://www.dulance.com/bot.jsp\r
+#              added MojeekBot http://www.mojeek.com/bot.html\r
+#              added nicebot http://www.egghelp.org/setup.htm ?\r
+#              added Snappy http://www.urltrends.com/faq.php\r
+#              added sohu agent\r
+#              added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net]\r
+#              added zspider http://feedback.redkolibri.com/\r
+# 2006-01-13\r
+#              added boitho.com-dc http://www.boitho.com/dcbot.html\r
+#              added IRLbot http://irl.cs.tamu.edu/crawler\r
+#              added virus_detector virus_harvester@securecomputing.com\r
+#              added Wavefire http://www.wavefire.com; info@wavefire.com\r
+\r
+#              added WebFilter Robot\r
+# 2006-01-24\r
+#              added Shim-Crawler http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp\r
+#              added Exabot exabot.com\r
+#              added LetsCrawl.com http://letscrawl.com\r
+#              added ichiro http://help.goo.ne.jp/door/crawlerE.html\r
+# 2006-01-27    additional 22 robots from a list provided by Moizes Gabor\r
+#              added ALeadSoftbot      http://www.aleadsoft.com/bot.htm\r
+#              added CipinetBot        http://www.cipinet.com/bot.html\r
+#              added Cuasarbot http://www.cuasar.com/\r
+#              added Dumbot    http://www.dumbfind.com/\r
+#              added Extreme_Picture_Finder    http://www.exisoftware.com/\r
+#              added Fooky.com/ScorpionBot/ScoutOut    http://www.fooky.com/scorpionbots\r
+#              added IlTrovatore-Setaccio      http://www.iltrovatore.it/aiuto/motore_di_ricerca.html  bot@iltrovatore.it\r
+#              added InsurancoBot      http://www.fastspywareremoval.com/\r
+#              added InternetArchive   http://lucene.apache.org/nutch/bot.html         nutch-agent@lucene.apache.org\r
+#              added KazoomBot http://www.kazoom.ca/bot.html   kazoombot@kazoom.ca\r
+#              added Kurzor    http://www.easymail.hu/ cursor@easymail.hu\r
+#              added NutchCVS  http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org\r
+#              added NutchOSU-VLIB     http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org\r
+#              added Orbiter   http://www.dailyorbit.com/bot.htm\r
+#              added PHP_version_tracker       http://www.nexen.net/phpversion/bot.php\r
+#              added SuperBot  http://www.sparkleware.com/superbot/\r
+#              added SynooBot  http://www.synoo.de/bot.html    webmaster@synoo.com\r
+#              added TestBot   http://www.agbrain.com/\r
+#              added TutorGigBot       http://www.tutorgig.info/\r
+#              added WebIndexer        mailto://webindexerv1@yahoo.com\r
+#              added WebMiner  http://64.124.122.252/feedback.html\r
+# 2006-02-01\r
+#              added heritrix https://sourceforge.net/forum/message.php?msg_id=3550202\r
+#              added Zeus Webster Pro https://sourceforge.net/forum/message.php?msg_id=3141164\r
+#               additional robots from a list provided by Moizes Gabor [ mojzi -a-t- free mail hu ]\r
+#              added Candlelight_Favorites_Inspector\r
+#              added DomainChecker\r
+#              added EasyDL\r
+#              added FavOrg\r
+#              added Favorites_Sweeper\r
+#              added Html_Link_Validator\r
+#              added Internet_Ninja\r
+#              added JRTwine_Software_Check_Favorites_Utility\r
+#              fixed Microsoft_URL_Control\r
+#              added miniRank\r
+#              added Missigua_Locator\r
+#              added NPBot\r
+#              added Ocelli\r
+#              added Onet.pl_SA\r
+#              added proodleBot\r
+#              added SearchGuild_DMOZ_Experiment\r
+#              added Susie\r
+#              added Website_Monitoring_Bot\r
+#              added Xenu_Link_Sleuth\r
+# 2006-05-15\r
+#              added ASPseek http://www.aspseek.org/\r
+#              added AdamM Bot http://home.blic.net/adamm/\r
+#              added archive.org_bot http://crawls.archive.org/collections/bncf/crawl.html\r
+#              added arianna.libero.it (Italian Portal/search engine)\r
+#              added Biz360 spider http://www.biz360.com\r
+#              added BlogBridge Service http://www.blogbridge.com/\r
+#              added BlogSearch http://www.icerocket.com/\r
+#              added libcrawl\r
+#              added edgeio-relanshanbottriever http://www.edgeio.com\r
+#              added FeedFlow http://feedflow.com/about\r
+#              added Biblioteca Nazionale Centrale di Firenze (Italian National Archive) http://www.bncf.firenze.sbn.it/raccolta.txt\r
+#              added Java catchall - used by many spam bots\r
+#              added lanshanbot http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_g_l_140406_1%5Cb\r
+#              added msnbot-media http://search.msn.com/msnbot.htm\r
+#              added MT::Telegraph::Agent\r
+#              added Netluchs http://www.netluchs.de/ (German SE bot)\r
+#              added oBot http://www.webmasterworld.com/forum11/1616.htm\r
+#              added Onfolio http://www.onfolio.com/  (IE Toolbar plugin) - hit rss feeds.\r
+#              added ping.blo.gs http://blo.gs/ping.php blog bot\r
+#              added Sphere Scout http://www.sphere.com/\r
+#              added sproose crawler http://www.sproose.com/bot.html\r
+#              added SyndicAPI http://syndicapi.com/bot.html\r
+#              added Yahoo! Mindset http://mindset.research.yahoo.com/\r
+#              added msrabot\r
+#              added Vagabondo & Vagabondo-WAP http://www.wise-guys.nl/Contact/index.php?botselected=webagents)#=uk\r
+#              fixed Missigua Locator detection (Missigua_Locator -> Missigua Locator)\r
+#              changed echo to echo! to avoid conflict with the bonecho (Firefox 2.0) browser.\r
+#                      This requires you to reprocess historic logs if you want EchO! to be recognized for older reports.\r
+# 2006-05-17\r
+#              added Alpha Search Agent # 62.152.125.60 Eurologon Srl\r
+#              added Krugle http://www.krugle.com/crawler/info.html the search engine for developers\r
+#              added Octora Beta Bot http://www.octora.com/ # Blog and Rss Search Engine\r
+#              added UbiCrawler http://law.dsi.unimi.it/ubicrawler/\r
+#              added Yahoo! Slurp China http://misc.yahoo.com.cn/help.html\r
+#                      You must reprocess old logs for the Yahoo! Slurp China bot to be detected in old reports\r
+# 2006-05-20\r
+#              added 1-More Scanner http://www.myzips.com/software/1-More-Scanner.phtml\r
+#              added Accoona-AI-Agent http://www.accoona.com/\r
+#              added ActiveBookmark http://www.libmaster.com/active_bookmark.php\r
+#              added BIGLOTRON http://www.biglotron.com/robot.html\r
+#              added Bookmark-Manager http://bkm.sourceforge.net/\r
+#              added cbn00glebot\r
+#              added Cerberian Drtrs http://www.pgts.com.au/cgi-bin/psql?robot_info=25240\r
+#              added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork\r
+#              added CheckWeb link validator http://p.duby.free.fr/chkweb.htm\r
+#              added Computer and Automation Research Institute Crawler http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html\r
+#              added ConveraCrawler http://www.authoritativeweb.com/crawl/\r
+#              added ConveraMultiMediaCrawler http://www.authoritativeweb.com/crawl/\r
+#              added CSE HTML Validator Lite Online http://online.htmlvalidator.com/php/onlinevallite.php\r
+#              added Cursor http://adcenter.hu/docs/en/bot.html\r
+#              added Custo http://www.netwu.com/custo/\r
+#              added DataFountains/DMOZ Downloader http://infomine.ucr.edu/\r
+#              added Deepindex http://www.deepindex.net/faq.php\r
+#              added DNSGroup http://www.dnsgroup.com/\r
+#              added DoCoMo http://www.nttdocomo.co.jp/\r
+#              added dumm.de-Bot http://www.dumm.de/\r
+#              added ETS v http://www.freetranslation.com/help/\r
+#              added eventax http://www.eventax.de/\r
+#              added FAST Enterprise Crawler * crawleradmin.t-info@telekom.de http://www.telekom.de/\r
+#              added FAST Enterprise Crawler http://www.fast.no/\r
+#              added FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de http://www.telekom.de/\r
+#              added FeedValidator http://feedvalidator.org/\r
+#              added FilmkameraBot http://www.filmkamera.at/bot.html\r
+#              added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece\r
+#              added Global Fetch http://www.wesonet.com/\r
+#              added GOFORITBOT http://www.goforit.com/about/\r
+#              added GoForIt.com http://www.goforit.com/about/\r
+#              added GPU p2p crawler http://gpu.sourceforge.net/search_engine.php\r
+#              added HooWWWer http://cosco.hiit.fi/search/hoowwwer/\r
+#              added HPPrint\r
+#              added HTMLParser http://htmlparser.sourceforge.net/\r
+#              added Hundesuche.com-Bot http://www.hundesuche.com/\r
+#              added InfoBot http://www.infobot.org/\r
+#              added InfociousBot http://corp.infocious.com/tech_crawler.php\r
+#              added InternetSupervision http://internetsupervision.com/\r
+#              added isearch2006 http://www.yahoo.com.cn/\r
+#              added IUPUI_Research_Bot http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/\r
+#              added KalamBot http://64.124.122.251/feedback.html\r
+#              added kamano.de NewsFeedVerzeichnis http://www.kamano.de/\r
+#              added Kevin http://dznet.com/kevin/\r
+#              added KnowItAll http://www.cs.washington.edu/research/knowitall/\r
+#              added Knowledge.com http://www.knowledge.com/\r
+#              added Kouaa Krawler http://www.kouaa.com/\r
+#              added ksibot http://ego.ms.mff.cuni.cz/\r
+#              added Link Valet Online http://www.htmlhelp.com/tools/valet/\r
+#              added lwp-request http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request\r
+#              added lwp-trivial http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm\r
+#              added MapoftheInternet.com http://MapoftheInternet.com/\r
+#              added Matrix S.p.A. - FAST Enterprise Crawler http://tin.virgilio.it/\r
+#              added Megite http://www.megite.com/\r
+#              added Metaspinner http://index.meta-spinner.de/\r
+#              added Mini-reptile\r
+#              added Misterbot http://www.misterbot.fr/\r
+#              added Miva http://www.miva.com/\r
+#              added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_m_141105_2\b\r
+#              added MSRBOT http://research.microsoft.com/research/sv/msrbot/\r
+#              added MS SharePoint Portal Server - MS Search 4.0 Robot http://support.microsoft.com/default.aspx?scid=kb;en-us;284022\r
+#              added Mydoyouhike http://www.doyouhike.net/my\r
+#              added NASA Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b\r
+#              added NetSprint http://www.netsprint.pl/serwis/\r
+#              added NimbleCrawler http://www.healthline.com/\r
+#              added OpenWebSpider http://www.openwebspider.org/\r
+#              added Oracle Ultra Search http://www.oracle.com/technology/products/ultrasearch/index.html\r
+#              added OSSProxy http://www.marketscore.com/FAQ.Aspx\r
+#              added passwordmaker.org http://passwordmaker.org/\r
+#              added PEAR HTTP Request class http://pear.php.net/\r
+#              added PEERbot http://www.peerbot.com/\r
+#              added PHP version tracker http://www.nexen.net/phpversion/bot.php\r
+#              added PictureOfInternet http://malfunction.org/poi/\r
+#              added plinki http://www.plinki.com/\r
+#              added Port Huron Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1133\b\r
+#              added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1135\b\r
+#              added ProjectWF-java-test-crawler\r
+#              added PyQuery http://sourceforge.net/projects/pyquery/\r
+#              added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/\r
+#              added Scumbot\r
+#              added Sensis Web Crawler http://www.sensis.com.au/\r
+#              added snap.com beta crawler http://www.snap.com/\r
+#              added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/\r
+#              added STEROID  Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm\r
+#              added Suchfin-Bot http://www.suchfin.de/\r
+#              added Sunrise http://www.sunrisexp.com/\r
+#              added Tagyu Agent http://www.tagyu.com/\r
+#              added Tcl http client package http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm\r
+#              added TeragramCrawlerSURF http://www.teragram.com/\r
+#              added Test Crawler http://netp.ath.cx/\r
+#              added UnChaos Bot Hybrid Web Search Engine http://www.unchaos.com/\r
+#              added unido-bot http://www.unchina.org/unido/unido/our_projects/3_3.html\r
+#              added UniversalFeedParser http://feedparser.org/ (seen from md301000.inktomisearch.com)\r
+#              added updated http://www.updated.com/\r
+#              added Vermut http://vermut.aol.com\r
+#              added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html\r
+#              added Vespa Crawler (Yahoo Norway?) http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb\r
+#              added VSE http://www.vivisimo.com/\r
+#              added webcrawl.net http://www.webcrawl.net/\r
+#              added Web Downloader http://www.krasu.ru/soft/chuchelo/\r
+#              added Webdup http://www.webdup.com/en/index.html\r
+#              added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b\r
+#              added WordPress http://wordpress.org/\r
+#              added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/\r
+#              added Xenu's Link Sleuth (with ')\r
+#              added xirq http://www.xirq.com/\r
+#              added yoogliFetchAgent http://www.yoogli.com/\r
+#              added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/\r
+#              -- fix - some robots were reported with _ where _ should have been a space.\r
+#              changed Xenu Link Sleuth\r
+#              changed microsoft[_+\s]url[_+\s]control -> microsoft_url_control\r
+#              changed favorites_sweeper -> favorites_sweeper\r
+#              -- updates\r
+#              updated AskJeeves to Ask\r
+# 2012-06-05 Albrecht Mueller\r
+#              added Grabber from SDSC (San Diego Supercomputer Center).\r
+# 2013-09-30 Albrecht Mueller\r
+# AWStats probably cannot detect this bot as it identifies itself in\r
+# the referrer field and not in the user agent string.\r
+#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"\r
+#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"\r
+#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"\r
+\r
+# to do  MS Search 4.0 Robot\r
+\r
+#package AWSROB;\r
+\r
+\r
+# Robots list was found at http://www.robotstxt.org/wc/active/all.txt\r
+# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html\r
+# Rem: To avoid bad detection, some robot's ids were removed from this list:\r
+#      - Robots with ID of 3 letters only\r
+#      - Robots called 'webs' and 'tcl'\r
+# Rem: directhit changed into direct_hit (its real id)\r
+# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser\r
+# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser\r
+# Rem: roadrunner changed into road_runner\r
+# Rem: lycos changed to lycos_ to avoid confusion with lycos-online browser\r
+# Rem: voyager changed into ^voyager\/ to avoid to exclude voyager and amigavoyager browser\r
+\r
+# RobotsSearchIDOrder\r
+# It contains all matching criteria to search for in log fields. This list is\r
+# used to know in which order to search Robot IDs.\r
+# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more\r
+# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more\r
+# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+\s]' and are quoted.\r
+#-------------------------------------------------------\r
+\r
+\r
+@RobotsSearchIDOrder_list1 = (\r
+# Common robots (In robot file)\r
+'bingbot/',\r
+'bingpreview',\r
+'MSIECrawler',\r
+'msnbot/',\r
+'msnbot\-media/',\r
+'AdIdxBot/',\r
+'NOT[\x20]Googlebot/',\r
+'Googlebot/',\r
+'Google[\x20]Web[\x20]Preview',\r
+'Googlebot\-Image/',\r
+'Googlebot\-Mobile/',\r
+'Google[\x20]Page[\x20]Speed',\r
+'google\-sitemaps',\r
+'Googlebot\-News',\r
+'Googlebot\-Video/',\r
+'AdsBot\-Google[\x20]\(',\r
+'AdsBot\-Google\-Mobile\-Apps',\r
+'Adsbot',\r
+'Mediapartners-Google',\r
+'Feedfetcher\-Google',\r
+'Google\-Adwords\-Instant',\r
+'Firefox/1\.5',\r
+'Yahoo![\x20]Slurp[\x20]China',\r
+'Yahoo![\x20]Slurp',\r
+'Baiduspider/',\r
+'Baiduspider\-image',\r
+'Baiduspider-',\r
+'YandexBot/',\r
+'YandexImages/',\r
+'YandexImageResizer',\r
+'YandexMetrika/',\r
+'YandexMobileBot/',\r
+'yandex',\r
+'electricmonk/',\r
+'spbot/',\r
+'SeznamBot/',\r
+'msie8',\r
+'AhrefsBot/',\r
+'007ac9[\x20]Crawler',\r
+'2345Explorer/',\r
+'360Spider',\r
+'A[\x20]Simple[\x20]Crawler',\r
+'Abrave',\r
+'acapbot/',\r
+'Accoona\-AI\-Agent/',\r
+'arcemedia',\r
+'AdnormCrawlerCatchBot/',\r
+'adscanner',\r
+'aiHitBot/',\r
+'aipbot/',\r
+'AlphaBot',\r
+'Apache\-HttpClient/',\r
+'Apexoo[\x20]Spider',\r
+'Applebot/',\r
+'archive\.org_bot',\r
+'Babya[\x20]Discoverer',\r
+'Barkrowler',\r
+'BDCbot/',\r
+'BellPagesCA/',\r
+'BeNosey[\x20]Mohawk[\x20]Search',\r
+'bhcBot',\r
+'bidswitchbot',\r
+'BigBozz/',\r
+'BinGet/',\r
+'bitlybot',\r
+'bl\.uk_lddc_bot/',\r
+'BLEXBot/',\r
+'bnf.fr_bot',\r
+'boitho\.com\-dc/',\r
+'BoogleBot',\r
+'BusinessBot:',\r
+'BW/',\r
+'Bytespider',\r
+'CatchBot/',\r
+'CB/Nutch',\r
+'CCBot/',\r
+'CheckMarkNetwork/',\r
+'Cliqzbot/',\r
+'CMS[\x20]Crawler',\r
+'Companybook\-Crawler',\r
+'ConveraCrawler/',\r
+'Contacts-Crawler',\r
+'contxbot',\r
+'cosmos/',\r
+'crawl/Nutch',\r
+'crawler4j',\r
+'CRAZYWEBCRAWLER',\r
+'CRMNLCrawlAgent',\r
+'CSE[\x20]HTML[\x20]Validator',\r
+'C\-T[\x20]bot',\r
+'CUBOT',\r
+'Curl/PHP',\r
+'cyencebot',\r
+'Dalvik/',\r
+'DataCrawler/',\r
+'daumoa',\r
+'daum',\r
+'Deepnet[\x20]Explorer',\r
+'DeuSu/',\r
+'Digincore',\r
+'Discordbot/',\r
+'Dispatch/',\r
+'DnyzBot',\r
+'DoCoMo/',\r
+'Domain[\x20]Re\-Animator[\x20]Bot',\r
+'DomainCrawler/',\r
+'DomainMacroCrawler/',\r
+'DomainSONOCrawler/',\r
+'DomainStatsBot/',\r
+'DotBot/',\r
+'DuckDuckBot-Https',\r
+'DuckDuckBot',\r
+'DuckDuckGo\-Favicons\-Bot/',\r
+'ELinks/',\r
+'ELinks[\x20]\(',\r
+'EmailMarketingRobot/',\r
+'EmeraldShield\.com[\x20]WebBot',\r
+'envolk\[ITS\]spider/',\r
+'eright',\r
+'EsperanzaBot',\r
+'Exabot/',\r
+'ExtLinksBot',\r
+'ExperianCrawlUK',\r
+'facebookexternalhit/',\r
+'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de',\r
+'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de',\r
+'FAST\-WebCrawler/',\r
+'Feosey[\x20]Mohk[\x20]Crawler',\r
+'findlinks/',\r
+'Findxbot/',\r
+'FirePHP/',\r
+'firstdirectory\-bot',\r
+'flamingo',\r
+'FlippyBearBot/',\r
+'^foo$',\r
+'Foregenix[\x20]Web[\x20]Scan',\r
+'FreeWebMonitoring[\x20]SiteChecker/',\r
+'fujilabol',\r
+'FurlBot/',\r
+'Gaisbot/',\r
+'Gallent[\x20]Spider',\r
+'GarlikCrawler/',\r
+'Getintent[\x20]Crawler',\r
+'GetintentCrawler[\x20]getintent\.com',\r
+'Gigabot/',\r
+'gipo\-crawler/Nutch',\r
+'Girafabot',\r
+'Gluten[\x20]Free[\x20]Crawler/',\r
+'gocrawl',\r
+'Gowikibot',\r
+'Go\-http\-client/',\r
+'GrapeshotCrawler/',\r
+'GSiteCrawler/',\r
+'GurujiBot/',\r
+'hadiBot',\r
+'HaosouSpider',\r
+'HELLO[\x20]Crawler',\r
+'holmes/',\r
+'houzzbot',\r
+'HTTP_Request2/',\r
+'HubSpot[\x20]Webcrawler',\r
+'HyperCrawl/',\r
+'ICC\-Crawler/',\r
+'iconoclast',\r
+'IDGCrawler/Nutch',\r
+'IDG/UK',\r
+'idmarch[\x20]Automatic\.beta/',\r
+'InbyBot',\r
+'Incutio[\x20]XML',\r
+'IndeedBot',\r
+'InfluenceBot',\r
+'IonCrawl',\r
+'IRLbot/',\r
+'IssueCrawler',\r
+'istellabot/',\r
+'James[\x20]BOT',\r
+'Jigsaw/',\r
+'JobFeed',\r
+'Jooblebot',\r
+'KomodiaBot/',\r
+'Konqueror/',\r
+'laserlikebot',\r
+'Lightspeed',\r
+'linkapediabot',\r
+'metager\-linkchecker',\r
+'Linguee[\x20]Bot',\r
+'linkchecker',\r
+'LinkCheck',\r
+'linkdexbot/',\r
+'LinkedInBot/',\r
+'LinkpadBot/',\r
+'Links[\x20]\(',\r
+'LinksManager\.com_bot',\r
+'LWP::Simple/',\r
+'Mail\.RU_Bot/',\r
+'makecontact',\r
+'mappy',\r
+'MauiBot',\r
+'meanpathbot/',\r
+'Mechanize',\r
+'Mediatoolkitbot',\r
+'MegaIndex\.ru/',\r
+'merzscope',\r
+'Meta_Bot',\r
+'mfibot/',\r
+'microsoft.*discovery',\r
+'missigua_locator',\r
+'MixrankBot',\r
+'MJ12bot/',\r
+'MojeekBot',\r
+'Mojolicious',\r
+'MXT/Nutch',\r
+'My[\x20]Nutch[\x20]Spider/',\r
+'myse/Nutch',\r
+'Naaraa',\r
+'Neevabot',\r
+'NerdyBot',\r
+'netEstate[\x20]NE[\x20]Crawler',\r
+'NetResearchServer/',\r
+'Nimbostratus-Bot',\r
+'nominet',\r
+'NRLCorpusBuilder/Nutch',\r
+'nutch\-1\.4/',\r
+'nutch\-1\.8/',\r
+'NutchCVS/',\r
+'o\.uk[\x20]robot',\r
+'ocrawler;',\r
+'ODP[\x20]link[\x20]checker',\r
+'Offline[\x20]Explorer/',\r
+'OmniExplorer_Bot/',\r
+'OrangeBot/',\r
+'Orliac',\r
+'OutclicksBot',\r
+'PageBitesHyperBot/',\r
+'Pcore',\r
+'pdffillerbot/',\r
+'peopleman',\r
+'PetalBot',\r
+'PhantomJS',\r
+'PHP/5\.2\.8',\r
+'Pinterestbot',\r
+'PiplBot',\r
+'Ploetz[\x20]\+[\x20]Zeller',\r
+'Plukkie/',\r
+'Princetonbot/',\r
+'PrivacyAwareBot/',\r
+'Prlog/',\r
+'proximic',\r
+'psbot/',\r
+'psbot\-image',\r
+'python_wk_crawler',\r
+'Python\-urllib/',\r
+'QCrawl',\r
+'Quick-Crawler',\r
+'ResearchBot',\r
+'roboto',\r
+'rogerbot/',\r
+'RSSingBot',\r
+'RukiCrawler/',\r
+'SafeDNS[\x20]search[\x20]bot/',\r
+'SafeDNSBot',\r
+'SafeSearch[\x20]microdata[\x20]crawler',\r
+'safesearch',\r
+'SBL\-BOT',\r
+'scrapy',\r
+'Screaming[\x20]Frog[\x20]SEO[\x20]Spider/',\r
+'ScreenerBot[\x20]Crawler[\x20]Beta',\r
+'Scrubby',\r
+'Searchie/',\r
+'SecurityResearch\.bot',\r
+'Seekmo',\r
+'semanticbot',\r
+'SemrushBot/',\r
+'SemrushBot-SI',\r
+'seo\-audit\-check\-bot/',\r
+'Seobility',\r
+'SEOkicks\-Robot',\r
+'SEOlyticsCrawler/',\r
+'SEOstats',\r
+'Seosys/Nutch',\r
+'Seoterritory\.com[\x20]bot',\r
+'serendeputy',\r
+'Shim\-Crawler',\r
+'SiteExplorer/',\r
+'siteexplorer\.info',\r
+'siteimprove',\r
+'Slackbot\-LinkExpanding',\r
+'SmabblerBot/',\r
+'Sogou[\x20]web[\x20]spider/',\r
+'special_archiver/',\r
+'Spiderbot/',\r
+'SpuhexBot',\r
+'spyonweb',\r
+'ssearch_bot',\r
+'Streamline3Bot',\r
+'SurdotlyBot/',\r
+'SurveyBot/',\r
+'taiil/Nutch',\r
+'tbot\-nutch',\r
+'TeeRaidBot',\r
+'TelegramBot',\r
+'Test/Nutch',\r
+'Test[\x20]Spider',\r
+'TestCrawler',\r
+'The[\x20]Knowledge[\x20]AI',\r
+'TkBot',\r
+'tracemyfile',\r
+'trendiction',\r
+'TurnitinBot/',\r
+'TurnitinBot',\r
+'TweetmemeBot/',\r
+'UCY/Nutch',\r
+'uni-leipzig\.de',\r
+'Uptimebot/',\r
+'UptimeRobot/',\r
+'URL[\x20]Checker',\r
+'UXCrawlerBot',\r
+'Validator\.nu/',\r
+'vBSEO',\r
+'vBulletin[\x20]via[\x20]PHP',\r
+'vebidoobot',\r
+'vegi[\x20]bot',\r
+'Velen',\r
+'viz/Nutch',\r
+'VoilaBot',\r
+'VORTEX/',\r
+'voyager/',\r
+'vuhuvBot',\r
+'W3C_Validator/',\r
+'W3C\-checklink/',\r
+'WBSearchBot/',\r
+'WbSrch/',\r
+'WeSEE:Ads/PageBot',\r
+'WeSEE:Ads/PictureBot',\r
+'WeSEE_Bot',\r
+'Wget/',\r
+'Who\.is[\x20]Bot',\r
+'wonderbot/',\r
+'woobot/',\r
+'Wotbox/',\r
+'Xaldon[\x20]WebSpider',\r
+'Xenu[\x20]Link[\x20]Sleuth',\r
+'xenu_link_sleuth',\r
+'XML[\x20]Sitemaps[\x20]Generator',\r
+'XoviBot/',\r
+'yacybot',\r
+'Yahoo[\x20]Link[\x20]Preview',\r
+'yak',\r
+'YisouSpider',\r
+'yoozBot',\r
+'Your\-Website\-Sucks',\r
+'zoominfobot',\r
+'zspider/',\r
+'ZumBot/',\r
+# below placed at end to catch some generics\r
+'ng/1\.',\r
+'ng/2\.',\r
+'libwww\-perl',\r
+'CFNetwork',\r
+'urllib',\r
+'javabee',\r
+'projectwf\-java\-test\-crawler',\r
+'java',\r
+'loocalcrawler/nutch',\r
+'nutchosu\-vlib',\r
+'nutch',\r
+'perlcrawler',\r
+'perl',\r
+# old robots using firefox < version 11 not identifying themselves as a robot.\r
+'(firefox/)([0-9]\.|[0-1][0]\.)'\r
+);\r
+\r
+@RobotsSearchIDOrder_list2 = (\r
+# Less common robots (In robot file)\r
+'^Mozilla$',\r
+'^mozilla\/3\.0\s\(compatible$',\r
+'^mozilla\/4\.0$',\r
+'^mozilla\/4\.0\s\(compatible;\)$',\r
+'^mozilla\/5\.0$',\r
+'^mozilla\/5\.0\s\(compatible;$',\r
+'^mozilla\/5\.0\s\(en\-us\)$',\r
+'^mozilla\/5\.0\sfirefox\/3\.0\.5$',\r
+'^Mozilla/6\.0[\x20]\(compatible\)$',\r
+'^Mozilla/(.*)Beta[\x20]\(Windows\)',\r
+'MSIE[\x20]2',\r
+'MSIE[\x20]3',\r
+'MSIE[\x20]4',\r
+'MSIE[\x20]5',\r
+'MSIE[\x20]6',\r
+'MSIE\+6\.0\;',\r
+'Windows[\x20]95',\r
+'Windows[\x20]98',\r
+\r
+# these could be removed to speed up processing as they are rarely seen\r
+'a6\-indexer',\r
+'abcdatos',\r
+'abonti\.com',\r
+'acme\.spider',\r
+'activebookmark',\r
+'adamm_bot',\r
+'advbot',\r
+'affectv\.co\.uk',\r
+'ahoythehomepagefinder',\r
+'aleadsoftbot',\r
+'alkaline',\r
+'allrati',\r
+'alltop',\r
+'almaden',\r
+'alpha_search_agent',\r
+'anthill',\r
+'antibot',\r
+'aport',\r
+'appie',\r
+'applesyndication',\r
+'arachnophilia',\r
+'arale',\r
+'araneo',\r
+'architext',\r
+'archive\-de\.com',\r
+'aretha',\r
+'argus',\r
+'ariadne',\r
+'arianna\.libero\.it',\r
+'arks',\r
+'aspider',\r
+'aspseek',\r
+'asterias',\r
+'asynchttpclient',\r
+'atn\.txt',\r
+'atomz',\r
+'auresys',\r
+'awbot',\r
+'backlinktest\.com',\r
+'backrub',\r
+'bbot',\r
+'becomebot',\r
+'bender',\r
+'betabot',\r
+'bigbrother',\r
+'biglotron',\r
+'BingLocalSearch',\r
+'bittorrent_bot',\r
+'biz360[_+\s]spider',\r
+'bjaaland',\r
+'blackwidow',\r
+'blindekuh',\r
+'blogbridge[_+\s]service',\r
+'blogged_crawl',\r
+'bloglines',\r
+'bloglovin',\r
+'blogpulse',\r
+'blogsearch',\r
+'blogshares',\r
+'blogslive',\r
+'blogssay',\r
+'bloodhound',\r
+'bncf\.firenze\.sbn\.it/raccolta\.txt',\r
+'bobby',\r
+'bookmark\-manager',\r
+'borg\-bot',\r
+'boris',\r
+'brightnet',\r
+'bruinbot',\r
+'bspider',\r
+'bubing',\r
+'bumblebee',\r
+'butterfly',\r
+'buzztracker',\r
+'cactvschemistryspider',\r
+'calif[^r]',\r
+'candlelight[_+\s]favorites[_+\s]inspector',\r
+'careerbot',\r
+'carpathia',\r
+'cassandra',\r
+'catbot',\r
+'cbn00glebot',\r
+'cerberian_drtrs',\r
+'cfetch',\r
+'cgireader',\r
+'chattertrap',\r
+'check_http',\r
+'checkbot',\r
+'checkweb_link_validator',\r
+'christcrawler',\r
+'churl',\r
+'cienciaficcion',\r
+'cipinetbot',\r
+'imagecoccoc',\r
+'coccoc',\r
+'coldfusion',\r
+'collective',\r
+'combine',\r
+'commons\-httpclient',\r
+'computer_and_automation_research_institute_crawler',\r
+'conceptbot',\r
+'contentmatch',\r
+'converamultimediacrawler',\r
+'coolbot',\r
+'copubbot',\r
+'core',\r
+'covario',\r
+'cruiser',\r
+'cscrawler',\r
+'cuasarbot',\r
+'cursor',\r
+'cusco',\r
+'custo',\r
+'cyberspyder',\r
+'datafountains/dmoz_downloader',\r
+'dataprovider\.com',\r
+'daviesbot',\r
+'daylifefeedfetcher',\r
+'daypopbot',\r
+'deepindex',\r
+'desertrealm',\r
+'deweb',\r
+'dienstspider',\r
+'digger',\r
+'digout4u',\r
+'diibot',\r
+'dipsie\.bot',\r
+'direct_hit',\r
+'discobot',\r
+'dlvr\.it',\r
+'dnabot',\r
+'dnsgroup',\r
+'doccheckbot',\r
+'domainappender',\r
+'domainchecker',\r
+'domainsdb\.net',\r
+'download_express',\r
+'dragonbot',\r
+'dreamwidth',\r
+'drupal',\r
+'dulance',\r
+'dumbot',\r
+'dumm\.de\-bot',\r
+'dwcp',\r
+'e\-collector',\r
+'earthcom\.info',\r
+'easydl',\r
+'ebiness',\r
+'eccp',\r
+'echo!',\r
+'edgeio\-retriever',\r
+'elfinbot',\r
+'emacs',\r
+'emcspider',\r
+'enteprise',\r
+'ernst[:blank:]2\.0',\r
+'esther',\r
+'ets_v',\r
+'eventax',\r
+'everbeecrawler',\r
+'everest\-vulcan',\r
+'evliyacelebi',\r
+'exactseek',\r
+'extreme[_+\s]picture[_+\s]finder',\r
+'ezoom',\r
+'ezresult',\r
+'facebook',\r
+'facebot',\r
+'fast\-search\-engine',\r
+'matrix_s\.p\.a\._\-_fast_enterprise_crawler',\r
+'fast_enterprise_crawler',\r
+'fastbot',\r
+'fastcrawler',\r
+'favicon',\r
+'favorg',\r
+'favorites_sweeper',\r
+'fdse',\r
+'feedburner',\r
+'feedcrawl',\r
+'feedflow',\r
+'feedmyinbox',\r
+'feedroll\.com',\r
+'feedsky',\r
+'feedster',\r
+'feedvalidator',\r
+'feedzira',\r
+'felix',\r
+'ferret',\r
+'fetchbot',\r
+'fetchrover',\r
+'fever/',\r
+'fido',\r
+'filmkamerabot',\r
+'filterdb\.iss\.net',\r
+'finderlein[_+\s]research[_+\s]crawler',\r
+'findexa_crawler',\r
+'finnish',\r
+'fireball',\r
+'firmilybot',\r
+'flexum',\r
+'foaf\-search\.net',\r
+'fooky\.com/ScorpionBot',\r
+'fouineur',\r
+'francoroute',\r
+'freecrawl',\r
+'freenews',\r
+'funnelweb',\r
+'g2crawler',\r
+'gama',\r
+'gazz',\r
+'gcreep',\r
+'geniebot',\r
+'genieo',\r
+'geohasher',\r
+'getbot',\r
+'geturl',\r
+'gigablastopensource',\r
+'global_fetch',\r
+'gnodspider',\r
+'goforit\.com',\r
+'goforitbot',\r
+'golem',\r
+'gonzo',\r
+'gougou',\r
+'gpu_p2p_crawler',\r
+'grabber',\r
+'grapeshot',\r
+'grapnel',\r
+'griffon',\r
+'gromit',\r
+'grub',\r
+'gulliver',\r
+'gulperbot',\r
+'hambot',\r
+'hanrss',\r
+'harvest',\r
+'havindex',\r
+'henrythemiragorobot',\r
+'heritrix',\r
+'hl_ftien_spider',\r
+'hometown',\r
+'hoowwwer',\r
+'hpprint',\r
+'htdig',\r
+'html[_+\s]link[_+\s]validator',\r
+'htmlgobble',\r
+'htmlparser',\r
+'httrack',\r
+'hundesuche\.com\-bot',\r
+'hyperdecontextualizer',\r
+'ia_archiver\-web\.archive\.org',\r
+'ia_archiver',\r
+'iajabot',\r
+'iaskspider',\r
+'i\-bot',\r
+'icarus6j',\r
+'ichiro',\r
+'icjobs\.de',\r
+'ilse',\r
+'iltrovatore\-setaccio',\r
+'imagelock',\r
+'implisensebot',\r
+'inagist',\r
+'incywincy',\r
+'infobot',\r
+'infociousbot',\r
+'infohelfer',\r
+'infomine',\r
+'informant',\r
+'infoseeksidewinder',\r
+'infoseek',\r
+'infospider',\r
+'inspectorwww',\r
+'insurancobot',\r
+'integromedb\.org',\r
+'intelliagent',\r
+'internet[_+\s]ninja',\r
+'internetarchive',\r
+'internetseer',\r
+'internetsupervision',\r
+'ips\-agent',\r
+'irobot',\r
+'iron33',\r
+'isearch2006',\r
+'israelisearch',\r
+'iupui_research_bot',\r
+'izsearch',\r
+'jacobin[\x20]club',\r
+'jakarta',\r
+'jbot',\r
+'jcrawler',\r
+'jeeves',\r
+'jennybot',\r
+'jobboerse',\r
+'jobot',\r
+'jobo',\r
+'joebot',\r
+'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility',\r
+'js\-kit',\r
+'jubii',\r
+'jumpstation',\r
+'justview',\r
+'kalambot',\r
+'kamano\.de_newsfeedverzeichnis',\r
+'kapsi',\r
+'katipo',\r
+'kazoombot',\r
+'kevin',\r
+'keyoshid',\r
+'kilroy',\r
+'kinja\-imagebot',\r
+'kinjabot',\r
+'knowitall',\r
+'knowledge\.com',\r
+'ko[_+\s]yappo[_+\s]robot',\r
+'kouaa_krawler',\r
+'krugle',\r
+'ksibot',\r
+'kummhttp',\r
+'kurzor',\r
+'labelgrabber\.txt',\r
+'lanshanbot',\r
+'larbin',\r
+'largesmall[\x20]crawler',\r
+'legs',\r
+'letscrawl\.com',\r
+'libcrawl',\r
+'lilina',\r
+'link_valet_online',\r
+'linkbot',\r
+'linkdex\.com',\r
+'linkidator',\r
+'linkscan',\r
+'linkstats[\x20]bot',\r
+'linkwalker',\r
+'lipperhey',\r
+'livejournal\.com',\r
+'lmspider',\r
+'loadtimebot',\r
+'lockon',\r
+'logo_gif',\r
+'longurl',\r
+'lssrocketcrawler',\r
+'ltbot',\r
+'ltx71',\r
+'lwp\-request',\r
+'lwp\-trivial',\r
+'lycos[_+\s]',\r
+'macworm',\r
+'madaali\.de',\r
+'magpierss',\r
+'magpie',\r
+'mapoftheinternet\.com',\r
+'marvin',\r
+'mattie',\r
+'mediabot',\r
+'mediafox',\r
+'megaindex',\r
+'megite',\r
+'memorybot',\r
+'mercator',\r
+'meshexplorer',\r
+'metager2\-verification\-bot',\r
+'metajobbot',\r
+'metaspinner',\r
+'metauri',\r
+'miadev',\r
+'microsoft[_+\s]url[_+\s]control',\r
+'microsoft[\x20]bits',\r
+'microsoft\-webdav\-miniredir',\r
+'mindcrawler',\r
+'mindupbot',\r
+'mini\-reptile',\r
+'minirank',\r
+'misterbot',\r
+'miva',\r
+'mizzu_labs',\r
+'mnogosearch',\r
+'moget',\r
+'momspider',\r
+'monster',\r
+'motor',\r
+'movabletype',\r
+'ms[_+\s]search[_+\s]6\.0[_+\s]robot',\r
+'ms_search_4\.0_robot',\r
+'msnbot\-udiscovery',\r
+'msrabot',\r
+'msrbot',\r
+'mt::telegraph::agent',\r
+'muncher',\r
+'muscatferret',\r
+'mwdsearch',\r
+'mydoyouhike',\r
+'myweb',\r
+'nagios',\r
+'nasa_search',\r
+'ndspider',\r
+'nederland\.zoek',\r
+'netcarta',\r
+'netcraft',\r
+'netluchs',\r
+'netmechanic',\r
+'netnewswire',\r
+'netscoop',\r
+'netsprint',\r
+'netvibes',\r
+'newrelicpinger',\r
+'newscan\-online',\r
+'newsfox',\r
+'newsgatoronline',\r
+'nextgensearchbot',\r
+'nhse',\r
+'nicebot',\r
+'nimblecrawler',\r
+'ning',\r
+'nomad',\r
+'northstar',\r
+'noxtrumbot',\r
+'npbot',\r
+'nzexplorer',\r
+'objectssearch',\r
+'occam',\r
+'ocelli',\r
+'octopus',\r
+'octora_beta_bot',\r
+'onet\.pl[_+\s]sa',\r
+'onfolio',\r
+'openfind',\r
+'opentaggerbot',\r
+'openwebspider',\r
+'optimizer',\r
+'oracle_ultra_search',\r
+'orb_search',\r
+'orbiter',\r
+'packrat',\r
+'pageboy',\r
+'panscient',\r
+'parasite',\r
+'passwordmaker\.org',\r
+'patric',\r
+'pear_http_request_class',\r
+'peerbot',\r
+'pegasus',\r
+'perignator',\r
+'perman',\r
+'petersnews',\r
+'phantom',\r
+'php[_+\s]version[_+\s]tracker',\r
+'phpcrawl',\r
+'phpdig',\r
+'picmole',\r
+'pictureofinternet',\r
+'piltdownman',\r
+'pimptrain',\r
+'ping\.blo\.gs',\r
+'pingdom',\r
+'pioneer',\r
+'pita',\r
+'pitkow',\r
+'pjspider',\r
+'plinki',\r
+'pluckfeedcrawler',\r
+'plumtreewebaccessor',\r
+'pogodak',\r
+'pompos',\r
+'popdexter',\r
+'poppi',\r
+'port_huron_labs',\r
+'portalb',\r
+'postfavorites',\r
+'postpost',\r
+'postrank',\r
+'powermarks',\r
+'printfulbot',\r
+'proodlebot',\r
+'protopage',\r
+'publiclibraryarchive',\r
+'pyquery',\r
+'python',\r
+'qihoobot',\r
+'quipply',\r
+'qwantify',\r
+'r6\_',\r
+'rambler',\r
+'ratingburner',\r
+'raven',\r
+'rbse',\r
+'redalert',\r
+'regator',\r
+'relevantnoise\.com',\r
+'resumerobot',\r
+'rhcs',\r
+'riddler',\r
+'road_runner',\r
+'robbie',\r
+'robi',\r
+'robocrawl',\r
+'robofox',\r
+'robozilla',\r
+'rojo',\r
+'rome[\x20]client',\r
+'roverbot',\r
+'rpt\-httpclient',\r
+'rssgraffiti',\r
+'rssimagesbot',\r
+'ruffle',\r
+'rufusbot',\r
+'rules',\r
+'safeads\.xyz',\r
+'safetynetrobot',\r
+'sage\+\+',\r
+'sandcrawler',\r
+'savetheworldheritage',\r
+'sbider',\r
+'schizozilla',\r
+'scooter',\r
+'scoutjet',\r
+'scumbot',\r
+'search\-info',\r
+'search_au',\r
+'searchguild[_+\s]dmoz[_+\s]experiment',\r
+'searchmetricsbot',\r
+'searchprocess',\r
+'seekbot',\r
+'semalt',\r
+'senrigan',\r
+'sensis_web_crawler',\r
+'seodiver',\r
+'seokicks\.de',\r
+'seoscanners',\r
+'sgscout',\r
+'shaggy',\r
+'shaihulud',\r
+'shareaholicbot',\r
+'shoutcast',\r
+'sift',\r
+'simbot',\r
+'simplepie',\r
+'sistrix',\r
+'site\-valet',\r
+'sitebot',\r
+'sitedomain\-bot',\r
+'sitetech',\r
+'skimbot',\r
+'skymob',\r
+'slcrawler',\r
+'slurp',\r
+'slysearch',\r
+'smartspider',\r
+'smtbot',\r
+'snap\.com_beta_crawler',\r
+'snappy',\r
+'snooper',\r
+'sohu\-search',\r
+'sohu',\r
+'solbot',\r
+'speedy',\r
+'sphere_scout',\r
+'spider[_+\s]monkey',\r
+'spiderline',\r
+'spiderlytics',\r
+'spiderman',\r
+'spiderview',\r
+'spip',\r
+'sproose_crawler',\r
+'spry',\r
+'sqworm',\r
+'ssearcher',\r
+'steeler',\r
+'steroid__download',\r
+'stq_bot',\r
+'Stratagems[\x20]Kumo',\r
+'suchfin\-bot',\r
+'suke',\r
+'summify\.com',\r
+'sunrise',\r
+'suntek',\r
+'superbot',\r
+'superfeedr',\r
+'susie',\r
+'sven',\r
+'syndic8',\r
+'syndicapi',\r
+'synoobot',\r
+'synthesio',\r
+'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',\r
+'tach_bw',\r
+'tagyu_agent',\r
+'tailrank',\r
+'tarantula',\r
+'tarspider',\r
+'tcl_http_client_package',\r
+'techbot',\r
+'technoratibot',\r
+'templeton',\r
+'teoma',\r
+'teragramcrawlersurf',\r
+'test_crawler',\r
+'testbot',\r
+'thumbsniper',\r
+'titan',\r
+'titin',\r
+'tkwww',\r
+'tlspider',\r
+'topblogsinfo',\r
+'topicblogs',\r
+'topix\.net',\r
+'trapit',\r
+'trileet',\r
+'turtlescanner',\r
+'turtle',\r
+'tutorgigbot',\r
+'tweetedtimes',\r
+'twiceler',\r
+'twisted[\x20]pagegetter',\r
+'twitterbot',\r
+'twitterfeed',\r
+'ubicrawler',\r
+'ucsd',\r
+'udmsearch',\r
+'ultraseek',\r
+'um\-IC',\r
+'um\-LN',\r
+'unchaos_bot_hybrid_web_search_engine',\r
+'unido\-bot',\r
+'unisterbot',\r
+'universalfeedparser',\r
+'unlost_web_crawler',\r
+'unwindfetchor',\r
+'updated',\r
+'urlck',\r
+'ustc\-semantic\-group',\r
+'vagabondo\-wap',\r
+'vagabondo',\r
+'valkyrie',\r
+'vermut',\r
+'versus_crawler_from_eda\.baykan@epfl\.ch',\r
+'verticrawl',\r
+'vespa_crawler',\r
+'victoria',\r
+'virus[_+\s]detector',\r
+'visionsearch',\r
+'voidbot',\r
+'voltron',\r
+'vse/',\r
+'vwbot',\r
+'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa',\r
+'w3index',\r
+'w3m2',\r
+'wallpaper',\r
+'wanderer',\r
+'wapspider',\r
+'wapspIRLider',\r
+'watchmouse',\r
+'wavefire',\r
+'waybackarchive\.org',\r
+'wazzup',\r
+'web_downloader',\r
+'webbandit',\r
+'webbase',\r
+'webcatcher',\r
+'webclipping\.com',\r
+'webcollage',\r
+'webcompass',\r
+'webcopy',\r
+'webcrawl\.net',\r
+'webdup',\r
+'webfetcher',\r
+'webfilter',\r
+'webfoot',\r
+'webinator',\r
+'webindexer',\r
+'weblayers',\r
+'weblinker',\r
+'webminer',\r
+'webmirror',\r
+'webmoose',\r
+'webquest',\r
+'webreader',\r
+'webreaper',\r
+'website[_+\s]monitoring[_+\s]bot',\r
+'websnarf',\r
+'webspider',\r
+'webvac',\r
+'webvulncrawl',\r
+'webwalker',\r
+'webwalk',\r
+'webwatch',\r
+'wells_search',\r
+'wer\-liefert\-was',\r
+'wesee:search',\r
+'wevikabot',\r
+'whatuseek',\r
+'whowhere',\r
+'windows\-rss\-platform',\r
+'wired\-digital',\r
+'zyborg',\r
+'wisenutbot',\r
+'wiumi',\r
+'wmir',\r
+'wolp',\r
+'wombat',\r
+'wonderer',\r
+'woozweb',\r
+'wordpress',\r
+'worm',\r
+'wume_crawler',\r
+'wwwc',\r
+'wwweasel',\r
+'wz101',\r
+'xget',\r
+'xirq',\r
+'xydo',\r
+'y!j',\r
+'yahoo![\x20]searchmonkey',\r
+'yahoo!_mindset',\r
+'yahoo\-blogs',\r
+'yahoo\-mmcrawler',\r
+'yahoo\-newscrawler',\r
+'yahoo[\x20]pipes',\r
+'yahoo\-verticalcrawler',\r
+'yahoocachesystem',\r
+'yahooexternalcache',\r
+'yahoofeedseeker',\r
+'yahooseeker\-testing',\r
+'yahooseeker',\r
+'yahooysmcm',\r
+'yammer',\r
+'yanga',\r
+'yet\-another\-spider',\r
+'yeti',\r
+'yie8',\r
+'yodaobot',\r
+'yooglifetchagent',\r
+'youdao',\r
+'yourls',\r
+'z\-add_link_checker',\r
+'zealbot',\r
+'zemanta',\r
+'zend_http_client',\r
+'zeus',\r
+'zhuaxia',\r
+'[^a]fish',\r
+'[\x20]netseer[\x20]',\r
+'^[1-3]$',\r
+'^finbot',\r
+'^motorola$',\r
+'^msie',\r
+'^voyager/',\r
+'^webindex$',\r
+'1\-more_scanner',\r
+'nbot'\r
+);\r
+\r
+@RobotsSearchIDOrder_listgen = (\r
+# Generic robot\r
+'robot',\r
+'blog',\r
+'checker',\r
+'crawl',\r
+'discover',\r
+'feed',\r
+'fetcher',\r
+'hunter',\r
+'link',\r
+'scanner',\r
+'seek',\r
+'sitemap',\r
+'spider',\r
+'sucker',\r
+'survey',\r
+'validator',\r
+'bot[\s_+:,\.\;\/\\\-]',\r
+'[\s_+:,\.\;\/\\\-]bot',\r
+'curl',\r
+'php',\r
+'ruby/',\r
+# Moving oBot here so it doesn't get assigned for other *obot robots\r
+'oBot/',\r
+'no_user_agent'\r
+);\r
+\r
+\r
+# RobotsHashIDLib\r
+# List of robots names ('robot id','robot clear text')\r
+#-------------------------------------------------------\r
+%RobotsHashIDLib   = (\r
+# Common robots (In robot file)\r
+'bingbot/','bingbot',\r
+'bingpreview','BingPreview',\r
+'MSIECrawler','MSIECrawler',\r
+'msnbot/','msnbot',\r
+'msnbot\-media/','msnbot-media',\r
+'AdIdxBot/','AdIdxBot Microsoft Ad Quality control',\r
+'NOT[\x20]Googlebot/','NOT Googlebot',\r
+'Googlebot/','Googlebot',\r
+'Google[\x20]Web[\x20]Preview','Google Web Preview',\r
+'Googlebot\-Image/','Googlebot-Image',\r
+'Googlebot\-Mobile/','Googlebot-Mobile',\r
+'Google[\x20]Page[\x20]Speed','Google Page Speed',\r
+'google\-sitemaps','google-sitemaps',\r
+'Googlebot\-News','Googlebot-News',\r
+'Googlebot\-Video/','Googlebot-Video',\r
+'AdsBot\-Google[\x20]\(','AdsBot-Google',\r
+'AdsBot\-Google\-Mobile\-Apps','AdsBot-Google-Mobile-Apps',\r
+'Adsbot','Adsbot',\r
+'Mediapartners-Google','Mediapartners-Google',\r
+'Feedfetcher\-Google','Feedfetcher-Google',\r
+'Google\-Adwords\-Instant','Google-Adwords-Instant',\r
+'Firefox/1\.5','Nautic Expo using Firefox/1.5',\r
+'Yahoo![\x20]Slurp[\x20]China','Yahoo! Slurp China',\r
+'Yahoo![\x20]Slurp','Yahoo! Slurp',\r
+'Baiduspider/','Baiduspider',\r
+'Baiduspider\-image','Baiduspider-image',\r
+'Baiduspider-','Baiduspider ( catchall )',\r
+'YandexBot/','YandexBot',\r
+'YandexImages/','YandexImages',\r
+'YandexImageResizer','YandexImageResizer',\r
+'YandexMetrika/','YandexMetrika',\r
+'YandexMobileBot/','YandexMobileBot',\r
+'yandex','Yandex ( catchall )',\r
+'electricmonk/','electricmonk',\r
+'spbot/','spbot',\r
+'SeznamBot/','SeznamBot',\r
+'msie8','msie8 - ( Rogue Robot )',\r
+'AhrefsBot/','AhrefsBot',\r
+'007ac9[\x20]Crawler','007ac9 Crawler',\r
+'2345Explorer/','2345Explorer',\r
+'360Spider','360Spider',\r
+'A[\x20]Simple[\x20]Crawler','A Simple Crawler',\r
+'Abrave','Abrave',\r
+'acapbot/','acapbot',\r
+'Accoona\-AI\-Agent/','Accoona-AI-Agent',\r
+'AdnormCrawlerCatchBot/','AdnormCrawlerCatchBot',\r
+'adscanner','adscanner',\r
+'aiHitBot/','aiHitBot',\r
+'aipbot/','aipbot',\r
+'AlphaBot','AlphaBot',\r
+'Apache\-HttpClient/','Apache-HttpClient',\r
+'Apexoo[\x20]Spider','Apexoo Spider',\r
+'Applebot/','Applebot',\r
+'arcemedia','AdsBot-ArceMedia',\r
+'archive\.org_bot','archive.org_bot',\r
+'Babya[\x20]Discoverer','Babya Discoverer',\r
+'Barkrowler','Barkrowler',\r
+'BDCbot/','BDCbot',\r
+'BellPagesCA/','BellPagesCA',\r
+'BeNosey[\x20]Mohawk[\x20]Search','BeNosey Mohawk Search',\r
+'bhcBot','bhcBot',\r
+'bidswitchbot','bidswitchbot',\r
+'BigBozz/','BigBozz',\r
+'BinGet/','BinGet',\r
+'bitlybot','bit.ly',\r
+'bl\.uk_lddc_bot/','bl.uk_lddc_bot',\r
+'BLEXBot/','BLEXBot',\r
+'bnf.fr_bot','bnf.fr_bot',\r
+'boitho\.com\-dc/','boitho.com-dc',\r
+'BoogleBot','BoogleBot',\r
+'BusinessBot:','BusinessBot:',\r
+'BW/','BW',\r
+'Bytespider','Bytespider',\r
+'CatchBot/','CatchBot',\r
+'CB/Nutch','CB/Nutch',\r
+'CCBot/','CCBot',\r
+'CheckMarkNetwork/','CheckMarkNetwork',\r
+'Cliqzbot/','Cliqzbot',\r
+'CMS[\x20]Crawler','CMS Crawler',\r
+'Companybook\-Crawler','Companybook-Crawler',\r
+'ConveraCrawler/','ConveraCrawler',\r
+'Contacts-Crawler','Contacts-Crawler',\r
+'contxbot','contxbot',\r
+'cosmos/','cosmos',\r
+'CRMNLCrawlAgent','CRMNLCrawlAgent',\r
+'crawl/Nutch','crawl/Nutch',\r
+'crawler4j','crawler4j',\r
+'CRAZYWEBCRAWLER','CRAZYWEBCRAWLER',\r
+'CSE[\x20]HTML[\x20]Validator','CSE HTML Validator',\r
+'C\-T[\x20]bot','C-T bot',\r
+'CUBOT','CUBOT',\r
+'Curl/PHP','Curl/PHP',\r
+'cyencebot','cyencebot',\r
+'Dalvik/','Dalvik',\r
+'DataCrawler/','DataCrawler',\r
+'daumoa','daumoa',\r
+'daum','daum',\r
+'Deepnet[\x20]Explorer','Deepnet Explorer',\r
+'DeuSu/','DeuSu',\r
+'Digincore','Digincore',\r
+'Discordbot/','Discordbot',\r
+'Dispatch/','Dispatch',\r
+'DnyzBot','DnyzBot',\r
+'DoCoMo/','DoCoMo',\r
+'Domain[\x20]Re\-Animator[\x20]Bot','Domain Re-Animator Bot',\r
+'DomainCrawler/','DomainCrawler',\r
+'DomainMacroCrawler/','DomainMacroCrawler',\r
+'DomainSONOCrawler/','DomainSONOCrawler',\r
+'DomainStatsBot/','DomainStatsBot',\r
+'DotBot/','DotBot',\r
+'DuckDuckBot-Https','DuckDuckBot-Https',\r
+'DuckDuckBot','DuckDuckBot',\r
+'DuckDuckGo\-Favicons\-Bot/','DuckDuckGo-Favicons-Bot',\r
+'ELinks/','ELinks',\r
+'ELinks[\x20]\(','ELinks (',\r
+'EmailMarketingRobot/','EmailMarketingRobot',\r
+'EmeraldShield\.com[\x20]WebBot','EmeraldShield.com WebBot',\r
+'envolk\[ITS\]spider/','envolk ITS spider',\r
+'eright','eright',\r
+'EsperanzaBot','EsperanzaBot',\r
+'Exabot/','Exabot',\r
+'ExtLinksBot','ExtLinksBot',\r
+'ExperianCrawlUK','ExperianCrawlUK',\r
+'facebookexternalhit/','facebookexternalhit',\r
+'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de','FAST Enterprise crawleradmin.t-info@telekom.de',\r
+'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de','FAST Enterprise T-Info_BI_cluster crawleradmin.t-info@telekom.de',\r
+'FAST\-WebCrawler/','FAST-WebCrawler',\r
+'Feosey[\x20]Mohk[\x20]Crawler','Feosey Mohk Crawler',\r
+'findlinks/','findlinks',\r
+'Findxbot/','Findxbot',\r
+'FirePHP/','FirePHP',\r
+'firstdirectory\-bot','firstdirectory-bot',\r
+'flamingo','Flamingo_SearchEngine',\r
+'FlippyBearBot/','FlippyBearBot',\r
+'^foo$','foo',\r
+'Foregenix[\x20]Web[\x20]Scan','Foregenix Web Scan',\r
+'FreeWebMonitoring[\x20]SiteChecker/','FreeWebMonitoring SiteChecker',\r
+'fujilabol','fujilabol',\r
+'FurlBot/','FurlBot',\r
+'Gaisbot/','Gaisbot',\r
+'Gallent[\x20]Spider','Gallent Spider',\r
+'GarlikCrawler/','GarlikCrawler',\r
+'Getintent[\x20]Crawler','GetIntent Crawler',\r
+'GetintentCrawler[\x20]getintent\.com','GetintentCrawler getintent.com',\r
+'Gigabot/','Gigabot',\r
+'gipo\-crawler/Nutch','gipo-crawler/Nutch',\r
+'Girafabot','Girafabot',\r
+'Gluten[\x20]Free[\x20]Crawler/','Gluten Free Crawler',\r
+'gocrawl','gocrawl',\r
+'Gowikibot','Gowikibot',\r
+'Go\-http\-client/','Go-http-client',\r
+'GrapeshotCrawler/','GrapeshotCrawler',\r
+'GSiteCrawler/','GSiteCrawler',\r
+'GurujiBot/','GurujiBot',\r
+'hadiBot','hadiBot',\r
+'HaosouSpider','HaosouSpider',\r
+'HELLO[\x20]Crawler','HELLO Crawler',\r
+'holmes/','holmes',\r
+'houzzbot','houzzbot',\r
+'HTTP_Request2/','HTTP_Request2',\r
+'HubSpot[\x20]Webcrawler','HubSpot Webcrawler',\r
+'HyperCrawl/','HyperCrawl',\r
+'ICC\-Crawler/','ICC-Crawler',\r
+'iconoclast','iconoclast',\r
+'IDGCrawler/Nutch','IDGCrawler/Nutch',\r
+'IDG/UK','IDG/UK',\r
+'idmarch[\x20]Automatic\.beta/','idmarch Automatic.beta',\r
+'InbyBot','InbyBot',\r
+'Incutio[\x20]XML','Incutio XML',\r
+'IndeedBot','IndeedBot',\r
+'InfluenceBot','InfluenceBot',\r
+'IonCrawl','IonCrawl',\r
+'IRLbot/','IRLbot',\r
+'IssueCrawler','IssueCrawler',\r
+'istellabot/','istellabot',\r
+'James[\x20]BOT','James BOT',\r
+'Jigsaw/','Jigsaw',\r
+'JobFeed','JobFeed',\r
+'Jooblebot','Jooblebot',\r
+'KomodiaBot/','KomodiaBot',\r
+'Konqueror/','Konqueror',\r
+'laserlikebot','laserlikebot',\r
+'Lightspeed','Lightspeed',\r
+'linkapediabot','linkapediabot',\r
+'metager\-linkchecker','metager-linkchecker',\r
+'Linguee[\x20]Bot','Linguee Bot',\r
+'linkchecker','linkchecker',\r
+'LinkCheck','LinkCheck',\r
+'linkdexbot/','linkdexbot',\r
+'LinkedInBot/','LinkedInBot',\r
+'LinkpadBot/','LinkpadBot',\r
+'Links[\x20]\(','Links (',\r
+'LinksManager\.com_bot','LinksManager.com_bot',\r
+'LWP::Simple/','LWP::Simple',\r
+'Mail\.RU_Bot/','Mail.RU Bot',\r
+'makecontact','makecontact',\r
+'mappy','Mappy Crawler',\r
+'MauiBot','MauiBot',\r
+'meanpathbot/','meanpathbot',\r
+'Mechanize','Mechanize',\r
+'Mediatoolkitbot','Mediatoolkitbot',\r
+'MegaIndex\.ru/','MegaIndex.ru',\r
+'merzscope','merzscope',\r
+'Meta_Bot','Meta_Bot',\r
+'mfibot/','mfibot',\r
+'microsoft.*discovery','Microsoft Office Protocol Discovery',\r
+'missigua_locator','missigua_locator',\r
+'MixrankBot','MixrankBot',\r
+'MJ12bot/','MJ12bot',\r
+'MojeekBot','MojeekBot',\r
+'Mojolicious','Mojolicious',\r
+'MXT/Nutch','MXT/Nutch',\r
+'My[\x20]Nutch[\x20]Spider/','My Nutch Spider',\r
+'myse/Nutch','myse/Nutch',\r
+'Naaraa','Naaraa',\r
+'Neevabot','Neevabot',\r
+'NerdyBot','NerdyBot',\r
+'netEstate[\x20]NE[\x20]Crawler','netEstate NE Crawler',\r
+'NetResearchServer/','NetResearchServer',\r
+'Nimbostratus-Bot','Nimbostratus-Bot',\r
+'nominet','nominet',\r
+'NRLCorpusBuilder/Nutch','NRLCorpusBuilder/Nutch',\r
+'nutch\-1\.4/','nutch-1.4',\r
+'nutch\-1\.8/','nutch-1.8',\r
+'NutchCVS/','NutchCVS',\r
+'o\.uk[\x20]robot','o uk.robot',\r
+'ocrawler;','ocrawler;',\r
+'ODP[\x20]link[\x20]checker','ODP link checker',\r
+'Offline[\x20]Explorer/','Offline Explorer',\r
+'OmniExplorer_Bot/','OmniExplorer_Bot',\r
+'OrangeBot/','OrangeBot',\r
+'Orliac','Orliac',\r
+'OutclicksBot','OutclicksBot',\r
+'PageBitesHyperBot/','PageBitesHyperBot',\r
+'Pcore','Pcore',\r
+'pdffillerbot/','pdffillerbot',\r
+'peopleman','peopleman',\r
+'PetalBot','PetalBot',\r
+'PhantomJS','PhantomJS',\r
+'PHP/5\.2\.8','PHP/5.2.8',\r
+'Pinterestbot','Pinterestbot',\r
+'PiplBot','PiplBot',\r
+'Ploetz[\x20]\+[\x20]Zeller','Ploetz + Zeller',\r
+'Plukkie/','Plukkie',\r
+'Princetonbot/','Princetonbot',\r
+'PrivacyAwareBot/','PrivacyAwareBot',\r
+'Prlog/','Prlog',\r
+'proximic','proximic',\r
+'psbot/','psbot',\r
+'psbot\-image','psbot-image',\r
+'python_wk_crawler','python_wk_crawler',\r
+'Python\-urllib/','Python-urllib',\r
+'QCrawl','QCrawl',\r
+'Quick-Crawler','Quick-Crawler',\r
+'ResearchBot','ResearchBot',\r
+'roboto','roboto',\r
+'rogerbot/','rogerbot',\r
+'RSSingBot','RSSingBot',\r
+'RukiCrawler/','RukiCrawler',\r
+'SafeDNS[\x20]search[\x20]bot/','SafeDNS search bot',\r
+'SafeDNSBot','SafeDNSBot',\r
+'SafeSearch[\x20]microdata[\x20]crawler','SafeSearch microdata crawler',\r
+'safesearch','safesearch ( catchall )',\r
+'SBL\-BOT','SBL-BOT',\r
+'scrapy','scrapy',\r
+'Screaming[\x20]Frog[\x20]SEO[\x20]Spider/','Screaming Frog SEO Spider',\r
+'ScreenerBot[\x20]Crawler[\x20]Beta','ScreenerBot Crawler Beta',\r
+'Scrubby','Scrubby',\r
+'Searchie/','Searchie',\r
+'SecurityResearch\.bot','Security Research Bot',\r
+'Seekmo','Seekmo',\r
+'semanticbot','semanticbot',\r
+'SemrushBot/','SemrushBot',\r
+'SemrushBot-SI','SemrushBot-SI',\r
+'seo\-audit\-check\-bot/','seo-audit-check-bot',\r
+'Seobility','Seobility',\r
+'SEOkicks\-Robot','SEOkicks-Robot',\r
+'SEOlyticsCrawler/','SEOlyticsCrawler',\r
+'SEOstats','SEOstats',\r
+'Seosys/Nutch','Seosys/Nutch',\r
+'Seoterritory\.com[\x20]bot','Seoterritory.com.bot',\r
+'serendeputy','serendeputy',\r
+'Shim\-Crawler','Shim-Crawler',\r
+'SiteExplorer/','SiteExplorer',\r
+'siteexplorer\.info','siteexplorer.info',\r
+'siteimprove','siteimprove',\r
+'Slackbot\-LinkExpanding','Slackbot-LinkExpanding',\r
+'SmabblerBot/','SmabblerBot',\r
+'Sogou[\x20]web[\x20]spider/','Sogou web spider',\r
+'special_archiver/','special_archiver',\r
+'Spiderbot/','Spiderbot',\r
+'SpuhexBot','SpuhexBot',\r
+'spyonweb','spyonweb',\r
+'ssearch_bot','ssearch_bot',\r
+'Streamline3Bot','Streamline3Bot',\r
+'SurdotlyBot/','SurdotlyBot',\r
+'SurveyBot/','SurveyBot',\r
+'taiil/Nutch','taiil/Nutch',\r
+'tbot\-nutch','tbot-nutch',\r
+'TeeRaidBot','TeeRaidBot',\r
+'TelegramBot','TelegramBot',\r
+'Test/Nutch','Test/Nutch',\r
+'Test[\x20]Spider','Test Spider',\r
+'TestCrawler','TestCrawler',\r
+'The[\x20]Knowledge[\x20]AI', 'The Knowledge AI',\r
+'TkBot','TkBot',\r
+'tracemyfile','tracemyfile',\r
+'trendiction','trendiction',\r
+'TurnitinBot/','TurnitinBot',\r
+'TurnitinBot','TurnitinBot',\r
+'TweetmemeBot/','TweetmemeBot',\r
+'UCY/Nutch','UCY/Nutch',\r
+'uni-leipzig\.de','uni-leipzig.de',\r
+'Uptimebot/','Uptimebot',\r
+'UptimeRobot/','UptimeRobot',\r
+'URL[\x20]Checker','URL Checker',\r
+'UXCrawlerBot','UXCrawlerBot',\r
+'Validator\.nu/','Validator.nu',\r
+'vBSEO','vBSEO',\r
+'vBulletin[\x20]via[\x20]PHP','vBulletin via PHP',\r
+'vebidoobot','vebidoobot',\r
+'vegi[\x20]bot','vegi bot',\r
+'Velen','Velen',\r
+'viz/Nutch','viz/Nutch',\r
+'VoilaBot','VoilaBot',\r
+'VORTEX/','VORTEX',\r
+'voyager/','voyager',\r
+'vuhuvBot','vuhuvBot',\r
+'W3C_Validator/','W3C_Validator',\r
+'W3C\-checklink/','W3C-checklink',\r
+'WBSearchBot/','WBSearchBot',\r
+'WbSrch/','WbSrch/',\r
+'WeSEE:Ads/PageBot','WeSEE:Ads/PageBot',\r
+'WeSEE:Ads/PictureBot','WeSEE:Ads/PictureBot',\r
+'WeSEE_Bot','WeSEE_Bot',\r
+'Wget/','Wget',\r
+'Who\.is[\x20]Bot','Who.is.Bot',\r
+'wonderbot/','wonderbot',\r
+'woobot/','woobot',\r
+'Wotbox/','Wotbox',\r
+'Xaldon[\x20]WebSpider','Xaldon WebSpider',\r
+'Xenu[\x20]Link[\x20]Sleuth','Xenu Link Sleuth',\r
+'xenu_link_sleuth','xenu_link_sleuth',\r
+'XML[\x20]Sitemaps[\x20]Generator','XML Sitemaps Generator',\r
+'XoviBot/','XoviBot',\r
+'yacybot','yacybot',\r
+'Yahoo[\x20]Link[\x20]Preview','Yahoo Link Preview',\r
+'yak','yak-linkfluence',\r
+'YisouSpider','YisouSpider',\r
+'yoozBot','yoozBot',\r
+'Your\-Website\-Sucks','Your-Website-Sucks',\r
+'zoominfobot','zoominfobot',\r
+'zspider/','zspider',\r
+'ZumBot/','ZumBot',\r
+'ng/1\.','ng/1.',\r
+'ng/2\.','ng/2.',\r
+'libwww\-perl','libwww-perl',\r
+'CFNetwork','CFNetwork',\r
+'urllib','urllib',\r
+'javabee','javabee',\r
+'projectwf\-java\-test\-crawler','projectwf-java-test-crawler',\r
+'java','Java ( catchall )',\r
+'loocalcrawler/nutch','loocalcrawler/nutch',\r
+'nutchosu\-vlib','nutchosu-vlib',\r
+'nutch','nutch ( catchall )',\r
+'perlcrawler','perlcrawler',\r
+'perl','perl',\r
+'(firefox/)([0-9]\.|[0-1][0]\.)','Firefox version 10 and lower - various robots',\r
+\r
+# Less common robots (In robot file)\r
+'^Mozilla$','Mozilla ( Rogue Robot )',\r
+'^mozilla\/3\.0\s\(compatible$', 'mozilla/3.0 (compatible - ( Rogue Robot )',\r
+'^mozilla\/4\.0$', 'mozilla/4.0 - ( Rogue Robot )',\r
+'^mozilla\/4\.0\s\(compatible;\)$', 'mozilla/4.0 (compatible;) - ( Rogue Robot )',\r
+'^mozilla\/5\.0$', 'mozilla/5.0 - ( Rogue Robot )',\r
+'^mozilla\/5\.0\s\(compatible;$', 'mozilla/5.0 (compatible; - ( Rogue Robot )',\r
+'^mozilla\/5\.0\s\(en\-us\)$', 'mozilla/5.0 (en-us) - ( Rogue Robot )',\r
+'^mozilla\/5\.0\sfirefox\/3\.0\.5$', 'mozilla/5.0 firefox/3.0.5 - ( Rogue Robot )',\r
+'^Mozilla/6\.0[\x20]\(compatible\)$','Mozilla/6.0 (compatible) - ( Rogue Robot )',\r
+'^Mozilla/(.*)Beta[\x20]\(Windows\)','Mozilla Beta (Windows) - ( Rogue Robot )',\r
+'MSIE[\x20]2','MSIE 2 - ( Rogue Robot )',\r
+'MSIE[\x20]3','MSIE 3 - ( Rogue Robot )',\r
+'MSIE[\x20]4','MSIE 4 - ( Rogue Robot )',\r
+'MSIE[\x20]5','MSIE 5 - ( Rogue Robot )',\r
+'MSIE[\x20]6','MSIE 6 - ( Rogue Robot )',\r
+'MSIE\+6\.0\;','MSIE+6.0; - ( Rogue Robot)',\r
+'Windows[\x20]95','Windows 95 - ( Rogue Robot )',\r
+'Windows[\x20]98','Windows 99 - ( Rogue Robot )',\r
+\r
+# these could be removed to speed up processing as they are rarely seen\r
+'a6\-indexer','a6-indexer',\r
+'abcdatos','abcdatos',\r
+'abonti\.com','abonti.com',\r
+'acme\.spider','acme.spider',\r
+'activebookmark','activebookmark',\r
+'adamm_bot','adamm_bot',\r
+'advbot','advbot',\r
+'affectv\.co\.uk','affectv.co.uk',\r
+'ahoythehomepagefinder','ahoythehomepagefinder',\r
+'aleadsoftbot','aleadsoftbot',\r
+'alkaline','alkaline',\r
+'allrati','allrati',\r
+'alltop','alltop',\r
+'almaden','almaden',\r
+'alpha_search_agent','alpha_search_agent',\r
+'anthill','anthill',\r
+'antibot','antibot',\r
+'aport','aport',\r
+'appie','appie',\r
+'applesyndication','applesyndication',\r
+'arachnophilia','arachnophilia',\r
+'arale','arale',\r
+'araneo','araneo',\r
+'architext','architext',\r
+'archive\-de\.com','archive-de.com',\r
+'aretha','aretha',\r
+'argus','argus',\r
+'ariadne','ariadne',\r
+'arianna\.libero\.it','arianna.libero.it',\r
+'arks','arks',\r
+'aspider','aspider',\r
+'aspseek','aspseek',\r
+'asterias','asterias',\r
+'asynchttpclient','asynchttpclient',\r
+'atn\.txt','atn.txt',\r
+'atomz','atomz',\r
+'auresys','auresys',\r
+'awbot','awbot',\r
+'backlinktest\.com','backlinktest.com',\r
+'backrub','backrub',\r
+'bbot','bbot',\r
+'becomebot','becomebot',\r
+'bender','bender',\r
+'betabot','betabot',\r
+'bigbrother','bigbrother',\r
+'biglotron','biglotron',\r
+'BingLocalSearch','BingLocalSearch',\r
+'bittorrent_bot','bittorrent_bot',\r
+'biz360[_+\s]spider','biz360 spider',\r
+'bjaaland','bjaaland',\r
+'blackwidow','blackwidow',\r
+'blindekuh','blindekuh',\r
+'blogbridge[_+\s]service','blogbridge service',\r
+'blogged_crawl','blogged_crawl',\r
+'bloglines','bloglines',\r
+'bloglovin','bloglovin',\r
+'blogpulse','blogpulse',\r
+'blogsearch','blogsearch',\r
+'blogshares','blogshares',\r
+'blogslive','blogslive',\r
+'blogssay','blogssay',\r
+'bloodhound','bloodhound',\r
+'bncf\.firenze\.sbn\.it/raccolta\.txt','bncf\.firenze\.sbn.it/raccolta.txt',\r
+'bobby','bobby',\r
+'bookmark\-manager','bookmark-manager',\r
+'borg\-bot','borg-bot',\r
+'boris','boris',\r
+'brightnet','brightnet',\r
+'bruinbot','bruinbot',\r
+'bspider','bspider',\r
+'bubing','bubing',\r
+'bumblebee','bumblebee',\r
+'butterfly','butterfly',\r
+'buzztracker','buzztracker',\r
+'cactvschemistryspider','cactvschemistryspider',\r
+'calif[^r]','calif[^r]',\r
+'candlelight[_+\s]favorites[_+\s]inspector','candlelight favorites inspector',\r
+'careerbot','careerbot',\r
+'carpathia','carpathia',\r
+'cassandra','cassandra',\r
+'catbot','catbot',\r
+'cbn00glebot','cbn00glebot',\r
+'cerberian_drtrs','cerberian_drtrs',\r
+'cfetch','cfetch',\r
+'cgireader','cgireader',\r
+'chattertrap','chattertrap',\r
+'check_http','check_http',\r
+'checkbot','checkbot',\r
+'checkweb_link_validator','checkweb_link_validator',\r
+'christcrawler','christcrawler',\r
+'churl','churl',\r
+'cienciaficcion','cienciaficcion',\r
+'cipinetbot','cipinetbot',\r
+'imagecoccoc','imagecoccoc',\r
+'coccoc','coccoc',\r
+'coldfusion','coldfusion',\r
+'collective','collective',\r
+'combine','combine',\r
+'commons\-httpclient','commons-httpclient',\r
+'computer_and_automation_research_institute_crawler','computer_and_automation_research_institute_crawler',\r
+'conceptbot','conceptbot',\r
+'contentmatch','contentmatch',\r
+'converamultimediacrawler','converamultimediacrawler',\r
+'coolbot','coolbot',\r
+'copubbot','copubbot',\r
+'core','core',\r
+'covario','covario',\r
+'cruiser','cruiser',\r
+'cscrawler','cscrawler',\r
+'cuasarbot','cuasarbot',\r
+'cursor','cursor',\r
+'cusco','cusco',\r
+'custo','custo',\r
+'cyberspyder','cyberspyder',\r
+'datafountains/dmoz_downloader','datafountains/dmoz_downloader',\r
+'dataprovider\.com','dataprovider.com',\r
+'daviesbot','daviesbot',\r
+'daylifefeedfetcher','daylifefeedfetcher',\r
+'daypopbot','daypopbot',\r
+'deepindex','deepindex',\r
+'desertrealm','desertrealm',\r
+'deweb','deweb',\r
+'dienstspider','dienstspider',\r
+'digger','digger',\r
+'digout4u','digout4u',\r
+'diibot','diibot',\r
+'dipsie\.bot','dipsie.bot',\r
+'direct_hit','direct_hit',\r
+'discobot','discobot',\r
+'dlvr\.it','dlvr.it',\r
+'dnabot','dnabot',\r
+'dnsgroup','dnsgroup',\r
+'doccheckbot','doccheckbot',\r
+'domainappender','domainappender',\r
+'domainchecker','domainchecker',\r
+'domainsdb\.net','domainsdb.net',\r
+'download_express','download_express',\r
+'dragonbot','dragonbot',\r
+'dreamwidth','dreamwidth',\r
+'drupal','drupal',\r
+'dulance','dulance',\r
+'dumbot','dumbot',\r
+'dumm\.de\-bot','dumm.de-bot',\r
+'dwcp','dwcp',\r
+'e\-collector','e-collector',\r
+'earthcom\.info','earthcom.info',\r
+'easydl','easydl',\r
+'ebiness','ebiness',\r
+'eccp','eccp',\r
+'echo!','echo!',\r
+'edgeio\-retriever','edgeio-retriever',\r
+'elfinbot','elfinbot',\r
+'emacs','emacs',\r
+'emcspider','emcspider',\r
+'enteprise','enteprise',\r
+'ernst[:blank:]2\.0','ernst[:blank:]2.0',\r
+'esther','esther',\r
+'ets_v','ets_v',\r
+'eventax','eventax',\r
+'everbeecrawler','everbeecrawler',\r
+'everest\-vulcan','everest-vulcan',\r
+'evliyacelebi','evliyacelebi',\r
+'exactseek','exactseek',\r
+'extreme[_+\s]picture[_+\s]finder','extreme picture finder',\r
+'ezoom','ezoom',\r
+'ezresult','ezresult',\r
+'facebook','facebook',\r
+'facebot','facebot',\r
+'fast\-search\-engine','fast-search-engine',\r
+'matrix_s\.p\.a\._\-_fast_enterprise_crawler','matrix_s.p.a._-_fast_enterprise_crawler',\r
+'fast_enterprise_crawler','fast_enterprise_crawler',\r
+'fastbot','fastbot',\r
+'fastcrawler','fastcrawler',\r
+'favicon','favicon',\r
+'favorg','favorg',\r
+'favorites_sweeper','favorites_sweeper',\r
+'fdse','fdse',\r
+'feedburner','feedburner',\r
+'feedcrawl','feedcrawl',\r
+'feedflow','feedflow',\r
+'feedmyinbox','feedmyinbox',\r
+'feedroll\.com','feedroll.com',\r
+'feedsky','feedsky',\r
+'feedster','feedster',\r
+'feedvalidator','feedvalidator',\r
+'feedzira','feedzira',\r
+'felix','felix',\r
+'ferret','ferret',\r
+'fetchbot','fetchbot',\r
+'fetchrover','fetchrover',\r
+'fever/','fever',\r
+'fido','fido',\r
+'filmkamerabot','filmkamerabot',\r
+'filterdb\.iss\.net','filterdb.iss.net',\r
+'finderlein[_+\s]research[_+\s]crawler','finderlein research crawler',\r
+'findexa_crawler','findexa_crawler',\r
+'finnish','finnish',\r
+'fireball','fireball',\r
+'firmilybot','firmilybot',\r
+'flexum','flexum',\r
+'foaf\-search\.net','foaf-search.net',\r
+'fooky\.com/ScorpionBot','fooky.com/ScorpionBot',\r
+'fouineur','fouineur',\r
+'francoroute','francoroute',\r
+'freecrawl','freecrawl',\r
+'freenews','freenews',\r
+'funnelweb','funnelweb',\r
+'g2crawler','g2crawler',\r
+'gama','gama',\r
+'gazz','gazz',\r
+'gcreep','gcreep',\r
+'geniebot','geniebot',\r
+'genieo','genieo',\r
+'geohasher','geohasher',\r
+'getbot','getbot',\r
+'geturl','geturl',\r
+'gigablastopensource','gigablastopensource',\r
+'global_fetch','global_fetch',\r
+'gnodspider','gnodspider',\r
+'goforit\.com','goforit.com',\r
+'goforitbot','goforitbot',\r
+'golem','golem',\r
+'gonzo','gonzo',\r
+'gougou','gougou',\r
+'gpu_p2p_crawler','gpu_p2p_crawler',\r
+'grabber','grabber',\r
+'grapeshot','grapeshot',\r
+'grapnel','grapnel',\r
+'griffon','griffon',\r
+'gromit','gromit',\r
+'grub','grub',\r
+'gulliver','gulliver',\r
+'gulperbot','gulperbot',\r
+'hambot','hambot',\r
+'hanrss','hanrss',\r
+'harvest','harvest',\r
+'havindex','havindex',\r
+'henrythemiragorobot','henrythemiragorobot',\r
+'heritrix','heritrix',\r
+'hl_ftien_spider','hl_ftien_spider',\r
+'hometown','hometown',\r
+'hoowwwer','hoowwwer',\r
+'hpprint','hpprint',\r
+'htdig','htdig',\r
+'html[_+\s]link[_+\s]validator','html link validator',\r
+'htmlgobble','htmlgobble',\r
+'htmlparser','htmlparser',\r
+'httrack','httrack',\r
+'hundesuche\.com\-bot','hundesuche.com-bot',\r
+'hyperdecontextualizer','hyperdecontextualizer',\r
+'ia_archiver\-web\.archive\.org','ia_archiver-web.archive.org',\r
+'ia_archiver','ia_archiver',\r
+'iajabot','iajabot',\r
+'iaskspider','iaskspider',\r
+'i\-bot','i-bot',\r
+'icarus6j','icarus6j',\r
+'ichiro','ichiro',\r
+'icjobs\.de','icjobs.de',\r
+'ilse','ilse',\r
+'iltrovatore\-setaccio','iltrovatore-setaccio',\r
+'imagelock','imagelock',\r
+'implisensebot','implisensebot',\r
+'inagist','inagist',\r
+'incywincy','incywincy',\r
+'infobot','infobot',\r
+'infociousbot','infociousbot',\r
+'infohelfer','infohelfer',\r
+'infomine','infomine',\r
+'informant','informant',\r
+'infoseeksidewinder','infoseeksidewinder',\r
+'infoseek','infoseek',\r
+'infospider','infospider',\r
+'inspectorwww','inspectorwww',\r
+'insurancobot','insurancobot',\r
+'integromedb\.org','integromedb.org',\r
+'intelliagent','intelliagent',\r
+'internet[_+\s]ninja','internet ninja',\r
+'internetarchive','internetarchive',\r
+'internetseer','internetseer',\r
+'internetsupervision','internetsupervision',\r
+'ips\-agent','ips-agent',\r
+'irobot','irobot',\r
+'iron33','iron33',\r
+'isearch2006','isearch2006',\r
+'israelisearch','israelisearch',\r
+'iupui_research_bot','iupui_research_bot',\r
+'izsearch','izsearch',\r
+'jacobin[\x20]club','jacobin club',\r
+'jakarta','jakarta',\r
+'jbot','jbot',\r
+'jcrawler','jcrawler',\r
+'jeeves','jeeves',\r
+'jennybot','jennybot',\r
+'jobboerse','jobboerse',\r
+'jobot','jobot',\r
+'jobo','jobo',\r
+'joebot','joebot',\r
+'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility','jrtwine software check favorites utility',\r
+'js\-kit','js-kit',\r
+'jubii','jubii',\r
+'jumpstation','jumpstation',\r
+'justview','justview',\r
+'kalambot','kalambot',\r
+'kamano\.de_newsfeedverzeichnis','kamano.de_newsfeedverzeichnis',\r
+'kapsi','kapsi',\r
+'katipo','katipo',\r
+'kazoombot','kazoombot',\r
+'kevin','kevin',\r
+'keyoshid','keyoshid',\r
+'kilroy','kilroy',\r
+'kinja\-imagebot','kinja-imagebot',\r
+'kinjabot','kinjabot',\r
+'knowitall','knowitall',\r
+'knowledge\.com','knowledge.com',\r
+'ko[_+\s]yappo[_+\s]robot','ko yappo robot',\r
+'kouaa_krawler','kouaa_krawler',\r
+'krugle','krugle',\r
+'ksibot','ksibot',\r
+'kummhttp','kummhttp',\r
+'kurzor','kurzor',\r
+'labelgrabber\.txt','labelgrabber.txt',\r
+'lanshanbot','lanshanbot',\r
+'larbin','larbin',\r
+'largesmall[\x20]crawler','largesmall crawler',\r
+'legs','legs',\r
+'letscrawl\.com','letscrawl.com',\r
+'libcrawl','libcrawl',\r
+'lilina','lilina',\r
+'link_valet_online','link_valet_online',\r
+'linkbot','linkbot',\r
+'linkdex\.com','linkdex.com',\r
+'linkidator','linkidator',\r
+'linkscan','linkscan',\r
+'linkstats[\x20]bot','linkstats bot',\r
+'linkwalker','linkwalker',\r
+'lipperhey','lipperhey',\r
+'livejournal\.com','livejournal.com',\r
+'lmspider','lmspider',\r
+'loadtimebot','loadtimebot',\r
+'lockon','lockon',\r
+'logo_gif','logo_gif',\r
+'longurl','longurl',\r
+'lssrocketcrawler','lssrocketcrawler',\r
+'ltbot','ltbot',\r
+'ltx71','ltx71',\r
+'lwp\-request','lwp-request',\r
+'lwp\-trivial','lwp-trivial',\r
+'lycos[_+\s]','lycos ',\r
+'macworm','macworm',\r
+'madaali\.de','madaali.de',\r
+'magpierss','magpierss',\r
+'magpie','magpie',\r
+'mapoftheinternet\.com','mapoftheinternet.com',\r
+'marvin','marvin',\r
+'mattie','mattie',\r
+'mediabot','mediabot',\r
+'mediafox','mediafox',\r
+'megaindex','megaindex',\r
+'megite','megite',\r
+'memorybot','memorybot',\r
+'mercator','mercator',\r
+'meshexplorer','meshexplorer',\r
+'metager2\-verification\-bot','metager2-verification-bot',\r
+'metajobbot','metajobbot',\r
+'metaspinner','metaspinner',\r
+'metauri','metauri',\r
+'miadev','miadev',\r
+'microsoft[_+\s]url[_+\s]control','microsoft url control',\r
+'microsoft[\x20]bits','microsoft bits',\r
+'microsoft\-webdav\-miniredir','microsoft-webdav-miniredir',\r
+'mindcrawler','mindcrawler',\r
+'mindupbot','mindupbot',\r
+'mini\-reptile','mini-reptile',\r
+'minirank','minirank',\r
+'misterbot','misterbot',\r
+'miva','miva',\r
+'mizzu_labs','mizzu_labs',\r
+'mnogosearch','mnogosearch',\r
+'moget','moget',\r
+'momspider','momspider',\r
+'monster','monster',\r
+'motor','motor',\r
+'movabletype','movabletype',\r
+'ms[_+\s]search[_+\s]6\.0[_+\s]robot','ms search 6.0 robot',\r
+'ms_search_4\.0_robot','ms_search_4.0_robot',\r
+'msnbot\-udiscovery','msnbot-udiscovery',\r
+'msrabot','msrabot',\r
+'msrbot','msrbot',\r
+'mt::telegraph::agent','mt::telegraph::agent',\r
+'muncher','muncher',\r
+'muscatferret','muscatferret',\r
+'mwdsearch','mwdsearch',\r
+'mydoyouhike','mydoyouhike',\r
+'myweb','myweb',\r
+'nagios','nagios',\r
+'nasa_search','nasa_search',\r
+'ndspider','ndspider',\r
+'nederland\.zoek','nederland.zoek',\r
+'netcarta','netcarta',\r
+'netcraft','netcraft',\r
+'netluchs','netluchs',\r
+'netmechanic','netmechanic',\r
+'netnewswire','netnewswire',\r
+'netscoop','netscoop',\r
+'netsprint','netsprint',\r
+'netvibes','netvibes',\r
+'newrelicpinger','newrelicpinger',\r
+'newscan\-online','newscan-online',\r
+'newsfox','newsfox',\r
+'newsgatoronline','newsgatoronline',\r
+'nextgensearchbot','nextgensearchbot',\r
+'nhse','nhse',\r
+'nicebot','nicebot',\r
+'nimblecrawler','nimblecrawler',\r
+'ning','ning',\r
+'nomad','nomad',\r
+'northstar','northstar',\r
+'noxtrumbot','noxtrumbot',\r
+'npbot','npbot',\r
+'nzexplorer','nzexplorer',\r
+'objectssearch','objectssearch',\r
+'occam','occam',\r
+'ocelli','ocelli',\r
+'octopus','octopus',\r
+'octora_beta_bot','octora_beta_bot',\r
+'onet\.pl[_+\s]sa','onet.pl sa',\r
+'onfolio','onfolio',\r
+'openfind','openfind',\r
+'opentaggerbot','opentaggerbot',\r
+'openwebspider','openwebspider',\r
+'optimizer','optimizer',\r
+'oracle_ultra_search','oracle_ultra_search',\r
+'orb_search','orb_search',\r
+'orbiter','orbiter',\r
+'packrat','packrat',\r
+'pageboy','pageboy',\r
+'panscient','panscient',\r
+'parasite','parasite',\r
+'passwordmaker\.org','passwordmaker.org',\r
+'patric','patric',\r
+'pear_http_request_class','pear_http_request_class',\r
+'peerbot','peerbot',\r
+'pegasus','pegasus',\r
+'perignator','perignator',\r
+'perman','perman',\r
+'petersnews','petersnews',\r
+'phantom','phantom',\r
+'php[_+\s]version[_+\s]tracker','php version tracker',\r
+'phpcrawl','phpcrawl',\r
+'phpdig','phpdig',\r
+'picmole','picmole',\r
+'pictureofinternet','pictureofinternet',\r
+'piltdownman','piltdownman',\r
+'pimptrain','pimptrain',\r
+'ping\.blo\.gs','ping.blo.gs',\r
+'pingdom','pingdom',\r
+'pioneer','pioneer',\r
+'pita','pita',\r
+'pitkow','pitkow',\r
+'pjspider','pjspider',\r
+'plinki','plinki',\r
+'pluckfeedcrawler','pluckfeedcrawler',\r
+'plumtreewebaccessor','plumtreewebaccessor',\r
+'pogodak','pogodak',\r
+'pompos','pompos',\r
+'popdexter','popdexter',\r
+'poppi','poppi',\r
+'port_huron_labs','port_huron_labs',\r
+'portalb','portalb',\r
+'postfavorites','postfavorites',\r
+'postpost','postpost',\r
+'postrank','postrank',\r
+'powermarks','powermarks',\r
+'printfulbot','printfulbot',\r
+'proodlebot','proodlebot',\r
+'protopage','protopage',\r
+'publiclibraryarchive','publiclibraryarchive',\r
+'pyquery','pyquery',\r
+'python','python',\r
+'qihoobot','qihoobot',\r
+'quipply','quipply',\r
+'qwantify','qwantify',\r
+'r6\_','r6\_',\r
+'rambler','rambler',\r
+'ratingburner','ratingburner',\r
+'raven','raven',\r
+'rbse','rbse',\r
+'redalert','redalert',\r
+'regator','regator',\r
+'relevantnoise\.com','relevantnoise.com',\r
+'resumerobot','resumerobot',\r
+'rhcs','rhcs',\r
+'riddler','riddler',\r
+'road_runner','road_runner',\r
+'robbie','robbie',\r
+'robi','robi',\r
+'robocrawl','robocrawl',\r
+'robofox','robofox',\r
+'robozilla','robozilla',\r
+'rojo','rojo',\r
+'rome[\x20]client','rome client',\r
+'roverbot','roverbot',\r
+'rpt\-httpclient','rpt-httpclient',\r
+'rssgraffiti','rssgraffiti',\r
+'rssimagesbot','rssimagesbot',\r
+'ruffle','ruffle',\r
+'rufusbot','rufusbot',\r
+'rules','rules',\r
+'safeads\.xyz','safeads.xyz',\r
+'safetynetrobot','safetynetrobot',\r
+'sage\+\+','sage++',\r
+'sandcrawler','sandcrawler',\r
+'savetheworldheritage','savetheworldheritage',\r
+'sbider','sbider',\r
+'schizozilla','schizozilla',\r
+'scooter','scooter',\r
+'scoutjet','scoutjet',\r
+'scumbot','scumbot',\r
+'search\-info','search-info',\r
+'search_au','search_au',\r
+'searchguild[_+\s]dmoz[_+\s]experiment','searchguild dmoz experiment',\r
+'searchmetricsbot','searchmetricsbot',\r
+'searchprocess','searchprocess',\r
+'seekbot','seekbot',\r
+'semalt','semalt',\r
+'senrigan','senrigan',\r
+'sensis_web_crawler','sensis_web_crawler',\r
+'seodiver','seodiver',\r
+'seokicks\.de','seokicks.de',\r
+'seoscanners','seoscanners',\r
+'sgscout','sgscout',\r
+'shaggy','shaggy',\r
+'shaihulud','shaihulud',\r
+'shareaholicbot','shareaholicbot',\r
+'shoutcast','shoutcast',\r
+'sift','sift',\r
+'simbot','simbot',\r
+'simplepie','simplepie',\r
+'sistrix','sistrix',\r
+'site\-valet','site-valet',\r
+'sitebot','sitebot',\r
+'sitedomain\-bot','sitedomain-bot',\r
+'sitetech','sitetech',\r
+'skimbot','skimbot',\r
+'skymob','skymob',\r
+'slcrawler','slcrawler',\r
+'slurp','slurp',\r
+'slysearch','slysearch',\r
+'smartspider','smartspider',\r
+'smtbot','smtbot',\r
+'snap\.com_beta_crawler','snap.com_beta_crawler',\r
+'snappy','snappy',\r
+'snooper','snooper',\r
+'sohu\-search','sohu-search',\r
+'sohu','sohu ( catchall )',\r
+'solbot','solbot',\r
+'speedy','speedy',\r
+'sphere_scout','sphere_scout',\r
+'spider[_+\s]monkey','spider monkey',\r
+'spiderline','spiderline',\r
+'spiderlytics','spiderlytics',\r
+'spiderman','spiderman',\r
+'spiderview','spiderview',\r
+'spip','spip',\r
+'sproose_crawler','sproose_crawler',\r
+'spry','spry',\r
+'sqworm','sqworm',\r
+'ssearcher','ssearcher',\r
+'steeler','steeler',\r
+'steroid__download','steroid__download',\r
+'stq_bot','stq_bot',\r
+'Stratagems[\x20]Kumo','Stratagems Kumo',\r
+'suchfin\-bot','suchfin-bot',\r
+'suke','suke',\r
+'summify\.com','summify.com',\r
+'sunrise','sunrise',\r
+'suntek','suntek',\r
+'superbot','superbot',\r
+'superfeedr','superfeedr',\r
+'susie','susie',\r
+'sven','sven',\r
+'syndic8','syndic8',\r
+'syndicapi','syndicapi',\r
+'synoobot','synoobot',\r
+'synthesio','synthesio',\r
+'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','t-h-u-n-d-e-r-s-t-o-n-e',\r
+'tach_bw','tach_bw',\r
+'tagyu_agent','tagyu_agent',\r
+'tailrank','tailrank',\r
+'tarantula','tarantula',\r
+'tarspider','tarspider',\r
+'tcl_http_client_package','tcl_http_client_package',\r
+'techbot','techbot',\r
+'technoratibot','technoratibot',\r
+'templeton','templeton',\r
+'teoma','teoma',\r
+'teragramcrawlersurf','teragramcrawlersurf',\r
+'test_crawler','test_crawler',\r
+'testbot','testbot',\r
+'thumbsniper','thumbsniper',\r
+'titan','titan',\r
+'titin','titin',\r
+'tkwww','tkwww',\r
+'tlspider','tlspider',\r
+'topblogsinfo','topblogsinfo',\r
+'topicblogs','topicblogs',\r
+'topix\.net','topix.net',\r
+'trapit','trapit',\r
+'trileet','trileet',\r
+'turtlescanner','turtlescanner',\r
+'turtle','turtle',\r
+'tutorgigbot','tutorgigbot',\r
+'tweetedtimes','tweetedtimes',\r
+'twiceler','twiceler',\r
+'twisted[\x20]pagegetter','twisted pagegetter',\r
+'twitterbot','twitterbot',\r
+'twitterfeed','twitterfeed',\r
+'ubicrawler','ubicrawler',\r
+'ucsd','ucsd',\r
+'udmsearch','udmsearch',\r
+'ultraseek','ultraseek',\r
+'um\-IC','ubermetrics-technologies.com',\r
+'um\-LN','ubermetrics-technologies.com',\r
+'unchaos_bot_hybrid_web_search_engine','unchaos_bot_hybrid_web_search_engine',\r
+'unido\-bot','unido-bot',\r
+'unisterbot','unisterbot',\r
+'universalfeedparser','universalfeedparser',\r
+'unlost_web_crawler','unlost_web_crawler',\r
+'unwindfetchor','unwindfetchor',\r
+'updated','updated',\r
+'urlck','urlck',\r
+'ustc\-semantic\-group','ustc-semantic-group',\r
+'vagabondo\-wap','vagabondo-wap',\r
+'vagabondo','vagabondo',\r
+'valkyrie','valkyrie',\r
+'vermut','vermut',\r
+'versus_crawler_from_eda\.baykan@epfl\.ch','versus_crawler_from_eda.baykan@epfl.ch',\r
+'verticrawl','verticrawl',\r
+'vespa_crawler','vespa_crawler',\r
+'victoria','victoria',\r
+'virus[_+\s]detector','virus_detector',\r
+'visionsearch','visionsearch',\r
+'voidbot','voidbot',\r
+'voltron','voltron',\r
+'vse/','vse',\r
+'vwbot','vwbot',\r
+'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa','w3c_css_validator_jfouffa',\r
+'w3index','w3index',\r
+'w3m2','w3m2',\r
+'wallpaper','wallpaper',\r
+'wanderer','wanderer',\r
+'wapspider','wapspider',\r
+'wapspIRLider','wapspIRLider',\r
+'watchmouse','watchmouse',\r
+'wavefire','wavefire',\r
+'waybackarchive\.org','waybackarchive.org',\r
+'wazzup','wazzup',\r
+'web_downloader','web_downloader',\r
+'webbandit','webbandit',\r
+'webbase','webbase',\r
+'webcatcher','webcatcher',\r
+'webclipping\.com','webclipping.com',\r
+'webcollage','webcollage',\r
+'webcompass','webcompass',\r
+'webcopy','webcopy',\r
+'webcrawl\.net','webcrawl.net',\r
+'webdup','webdup',\r
+'webfetcher','webfetcher',\r
+'webfilter','webfilter',\r
+'webfoot','webfoot',\r
+'webinator','webinator',\r
+'webindexer','webindexer',\r
+'weblayers','weblayers',\r
+'weblinker','weblinker',\r
+'webminer','webminer',\r
+'webmirror','webmirror',\r
+'webmoose','webmoose',\r
+'webquest','webquest',\r
+'webreader','webreader',\r
+'webreaper','webreaper',\r
+'website[_+\s]monitoring[_+\s]bot','website monitoring bot',\r
+'websnarf','websnarf',\r
+'webspider','webspider',\r
+'webvac','webvac',\r
+'webvulncrawl','webvulncrawl',\r
+'webwalker','webwalker',\r
+'webwalk','webwalk',\r
+'webwatch','webwatch',\r
+'wells_search','wells_search',\r
+'wer\-liefert\-was','wer-liefert-was',\r
+'wesee:search','wesee:search',\r
+'wevikabot','wevikabot',\r
+'whatuseek','whatuseek',\r
+'whowhere','whowhere',\r
+'windows\-rss\-platform','windows-rss-platform',\r
+'wired\-digital','wired-digital',\r
+'zyborg','zyborg',\r
+'wisenutbot','wisenutbot',\r
+'wiumi','wiumi',\r
+'wmir','wmir',\r
+'wolp','wolp',\r
+'wombat','wombat',\r
+'wonderer','wonderer',\r
+'woozweb','woozweb',\r
+'wordpress','wordpress',\r
+'worm','worm',\r
+'wume_crawler','wume_crawler',\r
+'wwwc','wwwc',\r
+'wwweasel','wwweasel',\r
+'wz101','wz101',\r
+'xget','xget',\r
+'xirq','xirq',\r
+'xydo','xydo',\r
+'y!j','y!j',\r
+'yahoo![\x20]searchmonkey','yahoo! searchmonkey',\r
+'yahoo!_mindset','yahoo!_mindset',\r
+'yahoo\-blogs','yahoo-blogs',\r
+'yahoo\-mmcrawler','yahoo-mmcrawler',\r
+'yahoo\-newscrawler','yahoo-newscrawler',\r
+'yahoo[\x20]pipes','yahoo pipes',\r
+'yahoo\-verticalcrawler','yahoo-verticalcrawler',\r
+'yahoocachesystem','yahoocachesystem',\r
+'yahooexternalcache','yahooexternalcache',\r
+'yahoofeedseeker','yahoofeedseeker',\r
+'yahooseeker\-testing','yahooseeker-testing',\r
+'yahooseeker','yahooseeker',\r
+'yahooysmcm','yahooysmcm',\r
+'yammer','yammer',\r
+'yanga','yanga',\r
+'yet\-another\-spider','yet-another-spider',\r
+'yeti','yeti',\r
+'yie8','yie8',\r
+'yodaobot','yodaobot',\r
+'yooglifetchagent','yooglifetchagent',\r
+'youdao','youdao',\r
+'yourls','yourls',\r
+'z\-add_link_checker','z-add_link_checker',\r
+'zealbot','zealbot',\r
+'zemanta','zemanta',\r
+'zend_http_client','zend_http_client',\r
+'zeus','zeus',\r
+'zhuaxia','zhuaxia',\r
+'[^a]fish','[^a]fish',\r
+'[\x20]netseer[\x20]',' netseer ',\r
+'^[1-3]$','^[1-3]$',\r
+'^finbot','^finbot',\r
+'^motorola$','^motorola$',\r
+'^msie','^msie',\r
+'^voyager/','^voyager',\r
+'^webindex$','webindex',\r
+'1\-more_scanner','1-more_scanner',\r
+# below placed at end to catch some generics\r
+'nbot','nbot',\r
+\r
+# Generic robot\r
+'robot','robot',\r
+'blog','blog',\r
+'checker','checker',\r
+'crawl','crawl',\r
+'discover','discover',\r
+'feed','feed',\r
+'fetcher','fetcher',\r
+'hunter','hunter',\r
+'link','link',\r
+'scanner','scanner',\r
+'seek','seek',\r
+'sitemap','sitemap',\r
+'spider','spider',\r
+'sucker','sucker',\r
+'survey','survey',\r
+'validator','validator',\r
+'bot[\s_+:,\.\;\/\\\-]','Unknown robot identified by bot\*',\r
+'[\s_+:,\.\;\/\\\-]bot','Unknown robot identified by \*bot',\r
+'curl','Curl',\r
+'php','A PHP script',\r
+'ruby/','Ruby script',\r
+'no_user_agent','empty user agent string',\r
+# Moving oBot towards the end so it does not pick up other *obot robots\r
+'oBot/','oBot',\r
+# Unknown robots identified by hit on robots.txt\r
+'unknown','Unknown robot (identified by hit on robots.txt)'\r
+);\r
+\r
+\r
+# RobotsAffiliateLib\r
+# This list try to tell by which Search Engine a robot is used\r
+#-------------------------------------------------------------\r
+%RobotsAffiliateLib = (\r
+);\r
+\r
+1;
\ No newline at end of file