# in the user agent string only once or just a few times. Most of the
# time a user agent string ist used that does not contain hints that
# a bot is involved. An example is the iCjobs spider.
+# msnbot-UDiscovery/2.0b seems to show this behaviour too.
#
#
#
'xenu\slink\ssleuth',
'xget',
# Other robots reported by users
+'^finbot', #UA string starts with "finbot", should not match "elfinbot"
+'^webindex$', #UA should not match "webindexer"
'1\-more_scanner',
'360spider',
'a6-indexer',
'alpha_search_agent',
'allrati',
'aport',
+'applebot',
'archive\-de\.com',
'archive\.org_bot',
'argus', # Must be before nutch
'daviesbot',
'daypopbot',
'deepindex',
+'deusu',
'dipsie\.bot',
'dnsgroup',
'doccheckbot',
'domainchecker',
'domainsdb\.net',
'dotbot',
+'duckduckgo-favicons-bot',
'dulance',
'dumbot',
'dumm\.de\-bot',
'g2crawler',
'gaisbot',
'geniebot',
+'genieo',
'gigablastopensource',
'gigabot',
'girafabot',
'ichiro',
'idmarch',
'iltrovatore\-setaccio',
+'implisensebot',
'infobot',
'infociousbot',
'infohelfer',
'iupui_research_bot',
'izsearch',
'james\sbot',
+'jobboerse', #AWStats seems not to find this one despite the fact that "JobboerseBot" and "jobboerse.com" appear in the UA-string, maybe some previous entry matches
'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility',
'justview',
'kalambot',
'meanpathbot',
'mediabot',
'mediapartners\-google',
+'megaindex',
'megite',
'memorybot',
'metager2-verification-bot',
+'metajobbot', #Does not show up in the results of Sep. 2015 despite the fact that the corresponing log file has about 40 entries containing "MetaJobBot" in the UA string - strange.
'metaspinner',
'miadev',
-'microsoft bits',
+'microsoft\sbits',
'microsoft.*discovery', # = 'microsoft (?:office (?:protocol|existence)|data access internet publishing provider protocol) discovery',
'microsoft[_+\s]url[_+\s]control',
+'mindupbot',
'mini\-reptile',
'minirank',
'missigua_locator',
'mydoyouhike',
'nagios',
'nasa_search',
-'netestate ne crawler',
+'netestate\sne\scrawler',
'netluchs',
'netsprint',
'newsgatoronline',
'onfolio',
'opentaggerbot',
'openwebspider',
+'optimizer',
'oracle_ultra_search',
'orangebot',
'orbiter',
'yodaobot',
'qihoobot',
+'qwantify',
'passwordmaker\.org',
'pear_http_request_class',
'peerbot',
'pyquery',
'rambler',
'redalert',
+'riddler',
'rogerbot',
'rojo',
'rssimagesbot',
'ruffle',
'rufusbot',
+'safesearch',
'sandcrawler',
'savetheworldheritage',
'sbider',
'sensis_web_crawler',
'seodiver',
'seokicks\.de',
+'seoscanners',
'seznambot',
'shim\-crawler',
'shoutcast',
'sitedomain-bot',
'siteexplorer\.info',
+'skimbot',
'slysearch',
'smtbot',
'snap\.com_beta_crawler',
'teragramcrawlersurf',
'test_crawler',
'testbot',
+'thumbsniper',
't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',
'topicblogs',
'turnitinbot',
'ustc\-semantic\-group',
'vagabondo\-wap',
'vagabondo',
+'vebidoobot',
'vermut',
'versus_crawler_from_eda\.baykan@epfl\.ch',
'vespa_crawler',
+'voltron',
'vortex',
'vse\/',
'w3c\-checklink',
'watchmouse',
'wavefire',
'waybackarchive\.org',
+'wbsearchbot',
'webclipping\.com',
'webcompass',
'webcrawl\.net',
'website[_+\s]monitoring[_+\s]bot',
'webvulncrawl',
'wells_search',
+'wer-liefert-was',
'wesee:search',
'wevikabot',
'wonderer',
'flexum',
'yanga',
'yet-another-spider',
+'yisouspider',
'yooglifetchagent',
'z\-add_link_checker',
'zealbot',
'geohasher',
'hanrss',
'inagist',
-'jacobin club',
+'jacobin\sclub',
'jakarta',
'js\-kit',
-'largesmall crawler',
+'largesmall\scrawler',
'linkedinbot',
'longurl',
'metauri',
'^msie',
# End of hiding bots.
'netnewswire',
-' netseer ',
+'\snetseer\s',
'netvibes',
'newrelicpinger',
'newsfox',
'r6\_',
'ratingburner',
'regator',
-'rome client',
+'rome\sclient',
'rpt\-httpclient',
'rssgraffiti',
'sage\+\+',
'trapit',
'trileet',
'tweetedtimes',
-'twisted pagegetter',
+'twisted\spagegetter',
'twitterbot',
'twitterfeed',
'unwindfetchor',
'windows\-rss\-platform',
'wiumi',
'xydo',
-'yahoo! slurp',
-'yahoo pipes',
+'yahoo!\sslurp',
+'yahoo\spipes',
'yahoo\-newscrawler',
'yahoocachesystem',
'yahooexternalcache',
-'yahoo! searchmonkey',
+'yahoo!\ssearchmonkey',
'yahooysmcm',
'yammer',
# 'yandexbot', #already covered by 'yandex'
'jennybot','JennyBot',
'mercator','Mercator',
'msnbot\-media','<a href="http://search.msn.com/msnbot.htm" title="Bot home page [new window]" target="_blank">MSNBot-media</a>',
-'msnbot-udiscovery', '<a href="http://search.msn.com/msnbot.htm" title="Feb 18, 2015: UA contains indentification during robots.txt access only." target="_blank">msnbot-UDiscovery</a> Note: Most traffic counts as user traffic',
+'msnbot-udiscovery', '<a href="http://search.msn.com/msnbot.htm" title="Feb 18, 2015: UA contains indentification during robots.txt access only." target="_blank">msnbot-UDiscovery</a> Note: AWStats counts most of its traffic as user traffic',
'msnbot','<a href="http://search.msn.com/msnbot.htm" title="Bot home page [new window]" target="_blank">MSNBot</a>',
'netcraft','<a href="http://www.netcraft.com/survey/" title="Bot home page [new window]" target="_blank">Netcraft</a>',
'petersnews','Petersnews',
'xenu\slink\ssleuth', '<a href="http://home.snafu.de/tilman/xenulink.html" rel="nofollow" title="Description, Download, FAQ Page [new window]" target="_blank">Xenu'. "'" . 's Link Sleuth <sup>(TM)</sup></a>, see <a href="http://en.wikipedia.org/wiki/Xenu%27s_Link_Sleuth" rel="nofollow" title="Wikipedia on Xenu'. "'" . 's Link Sleuth [new window]" target="_blank">Wikipedia</a>',
'xget','XGET',
# Other robots reported by users
+'^finbot', '<span title="As on Sep. 10, 2015, the user agent string did not contain a web address.">finbot</span>',
+'^webindex$', '<span title="As on Oct. 28, 2015, the user agent string did not contain a web address.">WebIndex</span>',
'1\-more_scanner','<a href="http://www.myzips.com/software/1-More-Scanner.phtml" title="1-More Scanner home page [new window]" target="_blank">1-More Scanner</a>',
'360spider','<a href="https://www.google.com/search?q=360spider+-Ferrari" title="No home page, using Google search instead [new window]" target="_blank">360spider</a>',
'a6-indexer', '<a href="http://www.a6corp.com/a6-web-scraping-policy/" rel="nofollow" title="A6-Indexer [new window]" target="_blank">A6-Indexer</a>',
'alpha_search_agent','Alpha Search Agent',
'allrati','Allrati',
'aport', 'Aport',
+'applebot', '<a href="http://www.apple.com/go/applebot" rel="nofollow" title="Applebot Home Page [new window]" target="_blank">Applebot</a>',
'archive\-de\.com', '<a href="http://archive-de.com/bot" rel="nofollow" title="Archive-de.com Home Page [new window]" target="_blank">Archive-de.com</a>',
'archive\.org_bot','<a href="http://crawls.archive.org/collections/bncf/crawl.html" title="Bot home page [new window]" target="_blank">archive.org bot</a>',
'argus','<a href="http://www.simpy.com/bot.html" title="feedback@simpy.com Bot home page [new window]" target="_blank">Argus</a>',
'daviesbot', 'DaviesBot',
'daypopbot', 'DayPop',
'deepindex','<a href="http://www.deepindex.net/faq.php" title="Deepindex home page [new window]" target="_blank">Deepindex</a>',
+'deusu', '<a href="https://deusu.de/robot.html" rel="nofollow" title="DeuSu [new window]" target="_blank">DeuSu</a>',
'dipsie\.bot','<a href="http://www.dipsie.com/bot/" title="Bot home page [new window]" target="_blank">Dipsie</a>',
'dnsgroup','<a href="http://www.dnsgroup.com/" title="DNSGroup home page [new window]" target="_blank">DNSGroup</a>',
'doccheckbot', 'doccheckbot/1.0, known to <a href="http://www.projecthoneypot.org/ip_46.229.160.208" rel="nofollow" title="Info to IP 46.229.160.208 [new window]" target="_blank">Project Honey Pot</a>',
'domainchecker','<a href="http://net-promoter.com/" title="DomainChecker home page (not confirmed) [new window]" target="_blank">DomainChecker</a>',
'domainsdb\.net','<a href="http://domainsdb.net/" title="Bot home page [new window]" target="_blank">DomainsDB.net</a>',
'dotbot', '<a href="http://www.opensiteexplorer.org/dotbot" rel="nofollow" title="Home Page [new window]" target="_blank">DotBot, Open Site Explorer</a>',
+'duckduckgo-favicons-bot', '<a href="http://duckduckgo.com" rel="nofollow" title="DuckDuckGo Home Page [new window]" target="_blank">DuckDuckGo-Favicons-Bot</a>',
'dulance','<a href="http://www.dulance.com/bot.jsp" title="Bot home page [new window]" target="_blank">Dulance</a>',
'dumbot','<a href="http://www.dumbfind.com/" title="Dumbot home page [new window]" target="_blank">Dumbot</a>',
'dumm\.de\-bot','<a href="http://www.dumm.de/" title="dumm.de-Bot home page [new window]" target="_blank">dumm.de-Bot</a>',
'g2crawler','<a href="http://crawler.instantnetworks.net/" title="Bot home page (nobody@airmail.net) [new window]" target="_blank">G2Crawler</a>',
'gaisbot','<a href="http://gais.cs.ccu.edu.tw/robot.php" title="Bot home page [new window]" target="_blank">Gaisbot</a>',
'geniebot','<a href="http://www.genieknows.com/" title="Bot home page [new window]" target="_blank">Geniebot</a>',
+'genieo', '<a href="http://www.genieo.com/webfilter.html" rel="nofollow" title="Genieo [new window]" target="_blank">Genieo</a>',
'gigablastopensource', '<a href="http://www.gigablast.com/" rel="nofollow" title="Gigablast Home page [new window]" target="_blank">GigablastOpenSource</a>, an Open Source Search Engine(<a href="https://github.com/gigablast/open-source-search-engine/wiki" rel="nofollow" title="at GitHub [new window]" target="_blank">Wiki</a>)',
'gigabot','<a href="http://www.gigablast.com/spider.html" title="Bot home page [new window]" target="_blank">GigaBot</a>',
'girafabot','<a href="http://www.girafa.com/" title="Bot home page [new window]" target="_blank">Girafabot</a>',
'ichiro','<a href="http://help.goo.ne.jp/door/crawlerE.html" title="Bot home page [new window]" target="_blank">ichiro</a>',
'idmarch', '<a href="http://www.idmarch.org/bot.html" rel="nofollow" title=" Home Page [new window]" target="_blank">IDMARCH</a>',
'iltrovatore\-setaccio','<a href="http://www.iltrovatore.it/aiuto/motore_di_ricerca.html" title="bot@iltrovatore.it IlTrovatore-Setaccio home page [new window]" target="_blank">IlTrovatore-Setaccio</a>',
+'implisensebot', '<span title="As on Sep. 18, 2015, the user agent string did not contain a web address.">ImplisenseBot</span>',
'infobot','<a href="http://www.infobot.org/" title="InfoBot home page [new window]" target="_blank">InfoBot</a>',
'infociousbot','<a href="http://corp.infocious.com/tech_crawler.php" title="InfociousBot home page [new window]" target="_blank">InfociousBot</a>',
'infohelfer','<a href="http://www.infohelfer.de/crawler.php" title="Infohelfer home page [new window]" target="_blank">Infohelfer</a>',
'iupui_research_bot','<a href="http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/" title="IUPUI_Research_Bot home page [new window]" target="_blank">IUPUI_Research_Bot</a>',
'izsearch', '<a href="http://izsearch.com/" rel="nofollow" title="iZSearch Home Page [new window]" target="_blank">iZSearch</a>',
'james\sbot', '<a href="http://cognitiveseo.com/bot.html" rel="nofollow" title="James BOT Home Page [new window]" target="_blank">James BOT</a>',
+'jobboerse', '<a href="http://www.xn--jobbrse-d1a.com" rel="nofollow" title="Jobbörse Home Page [new window]" target="_blank">Jobbörse</a>',
'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility','<a href="http://www.jrtwine.com/Products/CheckFavs/" title="JRTwine_Software_Check_Favorites_Utility home page [new window]" target="_blank">JRTwine_Software_Check_Favorites_Utility</a>',
'justview', 'JustView',
'kalambot','<a href="http://64.124.122.251/feedback.html" title="KalamBot home page [new window]" target="_blank">KalamBot</a>',
'mediapartners\-google','<a href="https://adwords.google.com/" title="Bot home page [new window]" target="_blank">Google AdSense</a>',
# 'Mediapartners-Google (Feb 12, 2015: no additial information in UA String, seems to use <a href="http://www.gigablast.com/" title="Gigablast Home page [new window]">GigablastOpenSource</a>',
# Uses UA string "Mediapartners-Google" only, and there were accesses using an UA string "GigablastOpenSource/1.0" from the same IP-Address.
-# Therefore this is probably not related to Google 4.3.2015 Albrecht M�ller
+# Therefore this is probably not related to Google 4.3.2015 Albrecht Müller
+'megaindex', '<a href="http://megaindex.com/crawler" rel="nofollow" title="MegaIndex Crawler Page [new window]" target="_blank">MegaIndex Crawler</a>, seems to belong to <a href="https://www.megaindex.ru/" rel="nofollow" title="MegaIndex.ru Home Page [new window]" target="_blank">MegaIndex.ru</a>',
'megite','<a href="http://www.megite.com/" title="Megite home page [new window]" target="_blank">Megite</a>',
'memorybot', '<a href="http://archivethe.net/en/index.php/about/internet_memory1" rel="nofollow" title="Archivethe.net Home Page [new window]" target="_blank">Archivethe.net</a>',
'metager2-verification-bot', '<a href="http://metager2.de/technology.php" rel="nofollow" title="metager2-verification-bot Home Page [new window]" target="_blank">metager2-verification-bot</a>',
'metager\-linkchecker','MetaGer LinkChecker',
+'metajobbot', '<a href="http://www.metajob.de/crawler" rel="nofollow" title="MetaJobBot [new window]" target="_blank">MetaJobBot</a>',
'metaspinner','<a href="http://index.meta-spinner.de/" title="Metaspinner home page [new window]" target="_blank">Metaspinner</a>',
'miadev', '<a href="http://www.mia-marktplatz.de/spider" rel="nofollow" title="MiaDev spider [new window]" target="_blank">MiaDev spider</a>',
-'microsoft bits', '<a href="http://msdn.microsoft.com/en-us/library/bb968799%28v=vs.85%29.aspx" rel="nofollow" title="Microsoft Background Intelligent Transfer Service (BITS)? [new window]" target="_blank">Microsoft Background Intelligent Transfer Service (BITS)?</a>',
+'microsoft\sbits', '<a href="http://msdn.microsoft.com/en-us/library/bb968799%28v=vs.85%29.aspx" rel="nofollow" title="Microsoft Background Intelligent Transfer Service (BITS)? [new window]" target="_blank">Microsoft Background Intelligent Transfer Service (BITS)?</a>',
'microsoft.*discovery', '<a href="http://support.microsoft.com/kb/838028/en-us" title="Microsoft KB838028 [new window]" target="_blank">Microsoft Office Protocol Discovery</a>/<a href="http://blogs.msdn.com/b/vsofficedeveloper/archive/2008/03/11/office-existence-discovery-protocol.aspx" title="Description of the Microsoft Office Existence Discovery [new window]" target="_blank">Microsoft Office Existence Discovery</a>',
'microsoft[_+\s]url[_+\s]control','<a href="http://www.webmasterworld.com/forum11/1005.htm" title="Microsoft URL Control home page [new window]" target="_blank">Microsoft URL Control</a>',
+'mindupbot', '<a href="http://datenbutler.de" rel="nofollow" title="DATENBUTLER home page [new window]" target="_blank">mindUpBot (datenbutler.de)</a>',
'minirank','<a href="http://minirank.com/" title="miniRank home page [new window]" target="_blank">miniRank</a>',
'mini\-reptile','Mini-reptile',
'missigua_locator','<a href="http://www.webmasterworld.com/forum11/2690.htm" title="Missigua_Locator home page [new window]" target="_blank">Missigua_Locator</a>',
'mydoyouhike','<a href="http://www.doyouhike.net/my" title="Mydoyouhike home page [new window]" target="_blank">Mydoyouhike</a>',
'nagios','Nagios',
'nasa_search','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b" title="NASA Search home page [new window]" target="_blank">NASA Search</a>',
-'netestate ne crawler','<a href="http://www.website-datenbank.de/" title="Website-Datenbank home page [new window]" target="_blank">Website-Datenbank</a>',
+'netestate\sne\scrawler','<a href="http://www.website-datenbank.de/" title="Website-Datenbank home page [new window]" target="_blank">Website-Datenbank</a>',
'netluchs','<a href="http://www.netluchs.de/" title="Bot home page. [new window]" target="_blank">Netluchs</a>',
'netsprint','<a href="http://www.netsprint.pl/serwis/" title="NetSprint home page [new window]" target="_blank">NetSprint</a>',
'newsgatoronline', 'NewsGator Online',
'onfolio','<a href="http://www.onfolio.com/" title="Bot home page [new window]">Onfolio</a>',
'opentaggerbot','<a href="http://www.opentagger.com/opentaggerbot.htm" title="Bot home page [new window]">OpenTaggerBot</a>',
'openwebspider','<a href="http://www.openwebspider.org/" title="OpenWebSpider home page [new window]" target="_blank">OpenWebSpider</a>',
+'optimizer', '<span title="As on Oct. 2, 2015, the user agent string did not contain a web address.">Optimizer</span>',
'oracle_ultra_search','<a href="http://www.oracle.com/technology/products/ultrasearch/index.html" title="Oracle Ultra Search home page [new window]" target="_blank">Oracle Ultra Search</a>',
'orangebot', 'OrangeBot, no website, log entry specifies mail address', # support.orangebot@orange.com
'orbiter','<a href="http://www.dailyorbit.com/bot.htm" title="Orbiter home page [new window]" target="_blank">Orbiter</a>',
'yodaobot','<a href="http://www.yodao.com/help/webmaster/spider/" title="YodaoBot">OutfoxBot/YodaoBot</a>',
'qihoobot','<a href="http://www.qihoo.com/" title="QihooBot">QihooBot</a>',
+'qwantify', '<a href="https://www.qwant.com/" rel="nofollow" title="Qwant Home Page [new window]" target="_blank">Qwant</a>',
'passwordmaker\.org','<a href="http://passwordmaker.org/" title="passwordmaker.org home page [new window]" target="_blank">passwordmaker.org</a>',
'pear_http_request_class','<a href="http://pear.php.net/" title="PEAR HTTP Request class home page [new window]" target="_blank">PEAR HTTP Request class</a>',
'peerbot','<a href="http://www.peerbot.com/" title="PEERbot home page [new window]" target="_blank">PEERbot</a>',
#146.0.32.165's User Agent Strings
#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)
#Mozilla/5.0 (compatible; savetheworldheritage.org/1.0; +crawl@savetheworldheritage.org)
+#Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net)
'pyquery','<a href="http://sourceforge.net/projects/pyquery/" title="PyQuery home page [new window]" target="_blank">PyQuery</a>',
'rambler','<a href="http://www.rambler.ru/doc/faq.shtml" title="Bot home page [new window]">StackRambler</a>',
'redalert','Red Alert',
'relevantnoise\.com', '<a href="http://www.relevantnoise.com/" title="Relevant Noise [new window]" target="_blank">Relevant Noise</a>',
+'riddler', '<a href="http://riddler.io/about" rel="nofollow" title="Riddler [new window]" target="_blank">Riddler</a>',
'rogerbot', '<a href="http://moz.com/help/pro/what-is-rogerbot-" rel="nofollow" title="Rogerbot Home Page [new window]" target="_blank">Rogerbot</a>',
'rojo','<a href="http://rojo.com/" title="Bot home page [new window]" target="_blank">RoJo</a> aggregator',
'rssimagesbot','<a href="http://herbert.groot.jebbink.nl/?app=rssImages" title="Bot home page [new window]" target="_blank">rssImagesBot</a>',
'ruffle','<a href="http://www.unreach.net/" title="Bot home page [new window]" target="_blank">ruffle SemanticWeb crawler</a>',
'rufusbot','<a href="http://64.124.122.252.webaroo.com/feedback.html" title="Bot home page [new window]" target="_blank">RufusBot Rufus Web Miner</a>',
+'safesearch', '<a href="https://safesearch.avira.com" rel="nofollow" title="Avira SafeSearch Home Page [new window]" target="_blank">Avira SafeSearch</a>',
'sandcrawler','<a href="http://www.microsoft.com/" title="Bot home page [new window]" target="_blank">SandCrawler (Microsoft)</a>',
'savetheworldheritage', '<a href="http://savetheworldheritage.org" rel="nofollow" title="On March 4, 2015 a page parked at GoDaddy [new window]" target="_blank">savetheworldheritage.org (related to spiderlytics.com, waybackarchive.org and/or publiclibraryarchive.org?)</a>',
'sbider','<a href="http://www.sitesell.com/sbider.html" title="Bot home page [new window]" target="_blank">SBIder</a>',
'sensis_web_crawler','<a href="http://www.sensis.com.au/" title="Sensis Web Crawler home page [new window]" target="_blank">Sensis Web Crawler</a>',
'seodiver', '<a href="http://www.seodiver.com/bot" rel="nofollow" title="SEO DIVER Bot Home Page [new window]" target="_blank">SEO DIVER</a>',
'seokicks\.de', '<a href="http://www.seokicks.de/robot.html" rel="nofollow" title="SEOkicks Webcrawler home page [new window]" target="_blank">SEOkicks Webcrawler</a>',
+'seoscanners', '<a href="http://seoscanners.net" rel="nofollow" title="On August 4, 2015 a page parked at GoDaddy [new window]" target="_blank">seoscanners.net</a> (related to publiclibraryarchive.org and savetheworldheritage.org?)',
'seznambot','<a href="http://fulltext.seznam.cz/" title="Bot home page [new window]" target="_blank">SeznamBot</a>',
'shim\-crawler','<a href="http://www.logos.ic.i.u-tokyo.ac.jp/crawler/" title="crawl@logos.ic.i.u-tokyo.ac.jp Bot home page [new window]" target="_blank">Shim-Crawler</a>',
'shoutcast','Shoutcast Directory Service',
'sitedomain-bot', '<a href="http://www.sitedomain.de/sitedomain-bot/" rel="nofollow" title="Sitedomain-Bot Home Page [new window]" target="_blank">Sitedomain.de</a>',
'siteexplorer\.info', '<a href="http://siteexplorer.info/" title="Site Explorer home page [new window]" target="_blank">Site Explorer</a>',
+'skimbot', '<a href="http://www.skimlinks.com" rel="nofollow" title="SkimBot [new window]" target="_blank">SkimBot</a>',
'slysearch','SlySearch',
'smtbot', '<a href="http://www.similartech.com/smtbot" rel="nofollow" title="SMTBot Home Page [new window]" target="_blank">SMTBot</a>',
'snap\.com_beta_crawler','<a href="http://www.snap.com/" title="snap.com beta crawler home page [new window]" target="_blank">snap.com beta crawler</a>',
'teragramcrawlersurf','<a href="http://www.teragram.com/" title="TeragramCrawlerSURF home page [new window]" target="_blank">TeragramCrawlerSURF</a>',
'test_crawler','<a href="http://netp.ath.cx/" title="Test Crawler home page [new window]" target="_blank">Test Crawler</a>',
'testbot','<a href="http://www.agbrain.com/" title="TestBot home page [new window]" target="_blank">TestBot</a>',
+'thumbsniper', '<a href="http://thumbsniper.com" rel="nofollow" title="ThumbSniper Home Page [new window]" target="_blank">ThumbSniper</a>',
't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','<a href="http://www.thunderstone.com/" title="Bot home page. Used by many. [new window]" target="_blank">T-H-U-N-D-E-R-S-T-O-N-E</a>',
'topicblogs', '<a href="http://www.topicblogs.com/" title="Bot home page [new window]" target="_blank">topicblogs</a>',
'turnitinbot', '<a href="http://www.turnitin.com/robot/crawlerinfo.html" rel="nofollow" title="TurnitinBot Home Page [new window]" target="_blank">Turn It In</a>',
'ustc\-semantic\-group','<a href="http://ai.ustc.edu.cn/mas/en/research/index.php" title="Bot home page [new window]" target="_blank">USTC-Semantic-Group</a>',
'vagabondo\-wap','<a href="http://www.wise-guys.nl/Contact/index.php?botselected=webagents&lang=uk" title="Bot home page [new window]" target="_blank">Vagabondo-WAP</a>',
'vagabondo','<a href="http://www.wise-guys.nl/Contact/index.php?botselected=webagents&lang=uk" title="Bot home page [new window]" target="_blank">Vagabondo</a>',
+'vebidoobot', '<span title="As on Oct. 27, 2015, the user agent string did not contain a web address.">vebidoobot</span>',
'vermut','<a href="http://vermut.aol.com/" title="Bot home page [new window]" target="_blank">Vermut</a>',
'versus_crawler_from_eda\.baykan@epfl\.ch','<a href="http://www.epfl.ch/Eindex.html " title="versus crawler from eda.baykan@epfl.ch home page [new window]" target="_blank">versus crawler from eda.baykan@epfl.ch</a>',
'vespa_crawler','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb" title="Bot home page [new window]" target="_blank">Vespa Crawler</a>',
+'voltron', '<span title="As on Oct. 21, 2015, the user agent string did not contain a web address.">voltron</span>',
'vortex','<a href="http://marty.anstey.ca/projects/robots/vortex/" title="Bot home page [new window]" target="_blank">VORTEX</a>',
'vse\/','<a href="http://www.vivisimo.com/" title="VSE home page [new window]" target="_blank">VSE</a>',
'w3c\-checklink','<a href="http://validator.w3.org/checklink/" title="Bot home page [new window]" target="_blank">W3C Link Checker</a>',
'waybackarchive\.org', '<span title="Maybe related to spiderlytics.">No website, email: spider(at)waybackarchive.org</span>',
# 2.12.2013 Project Honeypot reports at least one of the IPs used by waybackarchive with a spiderlytics UA string.
# Problably not related to the wayback machine of archive.org.
+'wbsearchbot', '<a href="http://www.warebay.com/bot.html" rel="nofollow" title="WBSearchBot [new window]" target="_blank">WBSearchBot</a>',
'webclipping\.com', 'WebClipping.com',
'webcompass', 'webcompass',
'webcrawl\.net','<a href="http://www.webcrawl.net/" title="webcrawl.net home page [new window]" target="_blank">webcrawl.net</a>',
'website[_+\s]monitoring[_+\s]bot','<a href="http://InternetSupervision.com/UrlMonitor/3/" title="Website_Monitoring_Bot home page [new window]" target="_blank">Website_Monitoring_Bot</a>',
'webvulncrawl', 'WebVulnCrawl',
'wells_search','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b " title="Wells Search home page [new window]" target="_blank">Wells Search</a>',
+'wer-liefert-was', '<a href="http://www.wlw.de/extern/crawler/Wer-liefert-was-Crawler.html" rel="nofollow" title="Page given in UA string gave a 404 Error on July 2, 2015 [new window]" target="_blank">Wer-liefert-was Crawler</a> Note: AWStats counts most traffic as user traffic',
'wesee:search', '<a href="http://www.wesee.com/en/support/bot/" title="WeSEE Bot Home Page (gave a 404-Error on Nov. 2, 2013) [new window]" target="_blank">WeSEE Bot</a>',
'wevikabot', '<a href="http://www.wevika.de/" rel="nofollow" title="WeViKa Home Page [new window]" target="_blank">WeViKa</a>',
'wonderer', 'Web Wombat Redback Spider',
'flexum', 'Flexum Search Engine',
'yanga', 'Yanga WorldSearch Bot',
'yet-another-spider','<a href="http://188.40.112.195/" title="Yet-Another-Spider home page [new window]" target="_blank">Yet-Another-Spider</a>',
+'yisouspider', 'YisouSpider (no additional information in UA string)',
'yooglifetchagent','<a href="http://www.yoogli.com/" title="yoogliFetchAgent home page [new window]" target="_blank">yoogliFetchAgent</a>',
'z\-add_link_checker','<a href="http://w3.z-add.co.uk/linkcheck/" title="Z-Add Link Checker home page [new window]" target="_blank">Z-Add Link Checker</a>',
'zealbot','ZealBot',
'sucker', 'Unknown robot (identified by \'sucker\')',
'bot[\s_+:,\.\;\/\\\-]', 'Unknown robot (identified by \'bot\' followed by a space or one of the following characters _+:,.;/\-)',
'[\s_+:,\.\;\/\\\-]bot', 'Unknown robot (identified by a space or one of the characters _+:,.;/\- followed by \'bot\')',
-'curl', 'Common *nix tool for automating web document retireval. Most likely a bot.',
+'curl', 'Common *nix tool for automating web document retrieval. Most likely a bot.',
'php', 'A PHP script',
'ruby\/', 'Ruby script',
# Additional bots found by Sussex.
'geohasher', 'geohasher',
'hanrss', 'hanrss',
'inagist', 'inagist',
-'jacobin club', 'jacobin club',
+'jacobin\sclub', 'jacobin club',
'jakarta', 'jakarta',
'js\-kit', 'js-kit',
-'largesmall crawler', 'largesmall crawler',
+'largesmall\scrawler', 'largesmall crawler',
'linkedinbot', 'linkedinbot',
'longurl', 'longurl',
'metauri', 'metauri',
'^mozilla\/5\.0\sfirefox\/3\.0\.5$', 'Suspected bot masqurading as Mozilla',
'^msie', 'Suspected bot masquerading as M$ IE',
'netnewswire', 'netnewswire',
-' netseer ', '<a href="http://www.netseer.com/crawler.html">Net Seer</a>',
+'\snetseer\s', '<a href="http://www.netseer.com/crawler.html">Net Seer</a>',
'netvibes', 'netvibes',
'newrelicpinger', 'newrelicpinger',
'newsfox', 'Fox News',
'r6\_', '<a href="http://www.radian6.com/crawler">Radian 6 Crawler</a>',
'ratingburner', 'ratingburner',
'regator', 'regator',
-'rome client', 'rome client',
+'rome\sclient', 'rome client',
'rpt\-httpclient', 'rpt-httpclient',
'rssgraffiti', 'rssgraffiti',
'sage\+\+', 'sage++',
'trapit', 'trapit',
'trileet', 'trileet',
'tweetedtimes', '<a href="http://tweetedtimes.com">The Tweeted Times</a>',
-'twisted pagegetter', 'twisted pagegetter',
+'twisted\spagegetter', 'twisted pagegetter',
'twitterbot', 'twitterbot',
'twitterfeed', 'twitterfeed',
'unwindfetchor', 'unwindfetchor',
'windows\-rss\-platform', 'windows-rss-platform',
'wiumi', 'wiumi',
'xydo', 'xydo',
-'yahoo! slurp', 'Additional Yahoo bots.',
-'yahoo pipes', 'Additional Yahoo bots.',
+'yahoo!\sslurp', 'Additional Yahoo bots.',
+'yahoo\spipes', 'Additional Yahoo bots.',
'yahoo\-newscrawler', 'Additional Yahoo bots.',
'yahoocachesystem', 'Additional Yahoo bots.',
'yahooexternalcache', 'Additional Yahoo bots.',
-'yahoo! searchmonkey', 'Additional Yahoo bots.',
+'yahoo!\ssearchmonkey', 'Additional Yahoo bots.',
'yahooysmcm', 'Additional Yahoo bots.',
'yammer', 'yammer',
#'yandexbot', 'yandexbot', #already covered by 'yandex'
'bingbot'=>'MSN',
'twitterbot'=>'Twitter',
'twitterfeed'=>'Twitter',
-'yahoo! slurp'=>'Yahoo',
-'yahoo pipes'=>'Yahoo',
+'yahoo!\sslurp'=>'Yahoo',
+'yahoo\spipes'=>'Yahoo',
'yahoo-newscrawler'=>'Yahoo',
'yahoocachesystem'=>'Yahoo',
'yahooexternalcache'=>'Yahoo',
-'yahoo! searchmonkey'=>'Yahoo',
+'yahoo!\ssearchmonkey'=>'Yahoo',
'yahooysmcm'=>'Yahoo'
);
'windowssearch\.com',
'www\.wow\.com',
'searches\.globososo\.com',
+'swisscows\.ch',
+'globososo\.com',
+'preciobarato\.xyz',
# Chello Portals
'chello\.at',
'chello\.be',
# Minor english search engines
'(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk',
'search\.fbdownloader\.com',
+'search\.fdownloadr\.com',
'search\.babylon\.com',
'my\.allgameshome\.com',
'surfcanyon\.com',
+'uk\.foxstart\.com',
+'yandex\.com',
# Minor finnish search engines
'haku\.www\.fi',
# Minor french search engines
'extern\.peoplecheck\.de',
'www\.oneseek\.de',
'de\.wiki\.gov\.cn',
+'umuwa\.de',
+'suche\.1und1\.de',
+'www\.metasuche\.ch',
# Minor Hungarian search engines
'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu',
'tango\.hu',
'windowssearch\.com', 'windowssearch_com',
'www\.wow\.com', 'www_wow_com',
'searches\.globososo\.com', 'globososo_com',
+'swisscows\.ch', 'swisscows_ch',
+'globososo\.com', 'globososo_com',
+'preciobarato\.xyz', 'preciobarato_xyz',
# Chello Portals
'chello\.at','chelloat',
'chello\.be','chellobe',
'ukplus\.','ukplus',
'searchy\.co\.uk','searchy',
'search\.fbdownloader\.com','fbdownloader',
+'search\.fdownloadr\.com', 'fdownloadr_com',
'search\.babylon\.com', 'babylon',
'my\.allgameshome\.com', 'allgameshome',
'surfcanyon\.com', 'surfcanyon_com',
+'uk\.foxstart\.com', 'uk_foxstart_com',
+'yandex\.com', 'yandex_com',
# Minor finnish search engines
'haku\.www\.fi','haku',
# Minor french search engines
'extern\.peoplecheck\.de', 'peoplecheck_de',
'www\.oneseek\.de', 'oneseek_de',
'de\.wiki\.gov\.cn', 'de_wiki_gov_cn',
+'umuwa\.de', 'umuwa_de',
+'suche\.1und1\.de', '1und1_de',
+'www\.metasuche\.ch', 'metasuche_ch',
# Minor Hungarian search engines
'heureka\.hu','heureka',
'vizsla\.origo\.hu','origo',
%SearchEnginesWithKeysNotInQuery=(
'a9',1, # www.a9.com/searchkey1%20searchkey2
'iminent',1, #http://start.iminent.com/StartWeb/1031/toolbox/#q=searchkey1%20searchkey2&additional_arguments
-'de_wiki_gov_cn',1 #http://de.wiki.gov.cn/s_searchkey1%20searchkey2
+'de_wiki_gov_cn',1, #http://de.wiki.gov.cn/s_searchkey1%20searchkey2
+'umuwa_de', 1, #http://umuwa.de/searchkey or http://umuwa.de/searchkey/Images
+'amazonsearch', 1 #http://www.amazon.de/gp/bit/apps/web/SERP/search/ref=bit_bds-p24_serp_cr_de?ie=UTF8tagbase=bds-p24&query=deutsch+8.+klasse+gymnasium+protokoll
);
# SearchEnginesKnownUrl
'windowssearch_com', 'q=',
'www_wow_com', 'q=',
'globososo_com', 'q=',
+'swisscows_ch', 'query=',
+'globososo_com', 'q=',
+'preciobarato_xyz', 's=',
# Chello Portals
'chelloat','q1=',
'chellobe','q1=',
'askuk','(ask|q)=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=',
'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=',
'fbdownloader','q=',
+'fdownloadr_com', 'q=',
'babylon','q=',
'allgameshome', 's=',
'surfcanyon_com', 'q=',
+'uk_foxstart_com', 'q=',
+'yandex_com', 'text=',
# Minor finnish search engines
'haku','w=',
# Minor french search engines
'peoplecheck_de', 'q=',
'oneseek_de', 'q=',
'de_wiki_gov_cn', 'de\.wiki\.gov\.cn\/s_',
+'umuwa_de', 'umuwa\.de\/',
+'1und1_de', 'q=',
+'metasuche_ch', 'q=',
# Minor Hungarian search engines
'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=',
'keresolap_hu','q=',
#------------------------------------------------------------------------------
%SearchEnginesHashLib=(
# Major international search engines
-'alexa','<a href="http://www.alexa.com/" title="Search Engine Home Page [new window]" target="_blank">Alexa</a>',
-'alltheweb','<a href="http://www.alltheweb.com/" title="Search Engine Home Page [new window]" target="_blank">AllTheWeb</a>',
-'altavista','<a href="http://www.altavista.com/" title="Search Engine Home Page [new window]" target="_blank">AltaVista</a>',
-'a9', '<a href="http://www.a9.com/" title="Search Engine Home Page [new window]" target="_blank">A9</a>',
-'dmoz','<a href="http://dmoz.org/" title="Search Engine Home Page [new window]" target="_blank">DMOZ</a>',
-'google_products','<a href="http://www.google.com/products" title="Search Engine Home Page [new window]" target="_blank">Google (Products)</a>',
-'google_base','<a href="http://base.google.com/" title="Search Engine Home Page [new window]" target="_blank">Google (Base)</a>',
-'google_froogle','<a href="http://froogle.google.com/" title="Search Engine Home Page [new window]" target="_blank">Froogle (Google)</a>',
-'google_groups','<a href="http://groups.google.com/" title="Search Engine Home Page [new window]" target="_blank">Google (Groups)</a>',
-'google_image','<a href="http://images.google.com/" title="Search Engine Home Page [new window]" target="_blank">Google (Images)</a>',
-'google_cache','<a href="http://www.google.com/help/features.html#cached" title="Search Engine Home Page [new window]" target="_blank">Google (cache)</a>',
-'google','<a href="http://www.google.com/" title="Search Engine Home Page [new window]" target="_blank">Google</a>',
-'lycos','<a href="http://www.lycos.com/" title="Search Engine Home Page [new window]" target="_blank">Lycos</a>',
-'msn','<a href="http://search.msn.com/" title="Search Engine Home Page [new window]" target="_blank">Microsoft MSN Search</a>',
-'live','<a href="http://www.live.com/" title="Search Engine Home Page [new window]" target="_blank">Microsoft Windows Live</a>',
-'bing','<a href="http://www.bing.com/" title="Search Engine Home Page [new window]" target="_blank">Microsoft Bing</a>',
-'netscape','<a href="http://www.netscape.com/" title="Search Engine Home Page [new window]" target="_blank">Netscape</a>',
-'aol','<a href="http://www.aol.com/" title="Search Engine Home Page [new window]" target="_blank">AOL</a>',
-'terra','<a href="http://www.terra.es/" title="Search Engine Home Page [new window]" target="_blank">Terra</a>',
-'tiscali','<a href="http://search.tiscali.com/" title="Search Engine Home Page [new window]" target="_blank">Tiscali</a>',
-'voila','<a href="http://www.voila.fr/" title="Search Engine Home Page [new window]" target="_blank">Voila</a>',
-'search.com','<a href="http://www.search.com/" title="Search Engine Home Page [new window]" target="_blank">Search.com</a>',
-'yahoo_mindset','<a href="http://mindset.research.yahoo.com/" title="Search Engine Home Page [new window]" target="_blank">Yahoo! Mindset</a>',
-'yahoo','<a href="http://www.yahoo.com/" title="Search Engine Home Page [new window]" target="_blank">Yahoo!</a>',
-'sympatico','<a href="http://sympatico.msn.ca/" title="Search Engine Home Page [new window]" target="_blank">Sympatico</a>',
-'excite','<a href="http://www.excite.com/" title="Search Engine Home Page [new window]" target="_blank">Excite</a>',
+'alexa','<a href="http://www.alexa.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Alexa</a>',
+'alltheweb','<a href="http://www.alltheweb.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">AllTheWeb</a>',
+'altavista','<a href="http://www.altavista.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">AltaVista</a>',
+'a9', '<a href="http://www.a9.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">A9</a>',
+'dmoz','<a href="http://dmoz.org/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">DMOZ</a>',
+'google_products','<a href="http://www.google.com/products" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (Products)</a>',
+'google_base','<a href="http://base.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (Base)</a>',
+'google_froogle','<a href="http://froogle.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Froogle (Google)</a>',
+'google_groups','<a href="http://groups.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (Groups)</a>',
+'google_image','<a href="http://images.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (Images)</a>',
+'google_cache','<a href="http://www.google.com/help/features.html#cached" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google (cache)</a>',
+'google','<a href="http://www.google.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Google</a>',
+'lycos','<a href="http://www.lycos.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Lycos</a>',
+'msn','<a href="http://search.msn.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Microsoft MSN Search</a>',
+'live','<a href="http://www.live.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Microsoft Windows Live</a>',
+'bing','<a href="http://www.bing.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Microsoft Bing</a>',
+'netscape','<a href="http://www.netscape.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Netscape</a>',
+'aol','<a href="http://www.aol.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">AOL</a>',
+'terra','<a href="http://www.terra.es/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Terra</a>',
+'tiscali','<a href="http://search.tiscali.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Tiscali</a>',
+'voila','<a href="http://www.voila.fr/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Voila</a>',
+'search.com','<a href="http://www.search.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Search.com</a>',
+'yahoo_mindset','<a href="http://mindset.research.yahoo.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Yahoo! Mindset</a>',
+'yahoo','<a href="http://www.yahoo.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Yahoo!</a>',
+'sympatico','<a href="http://sympatico.msn.ca/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Sympatico</a>',
+'excite','<a href="http://www.excite.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Excite</a>',
# Minor international search engines
-'google4counter','<a href="http://www.4-counter.com/" title="Search Engine Home Page [new window]" target="_blank">4-counter (Google)</a>',
-'att','<a href="http://www.att.net/" title="Search Engine Home Page [new window]" target="_blank">AT&T search (powered by Google)</a>',
-'bungeebonesdotcom','<a href="http://BungeeBones.com/search.php/" title="Search Engine Home Page [new window]" target="_blank">BungeeBones</a>',
+'google4counter','<a href="http://www.4-counter.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">4-counter (Google)</a>',
+'att','<a href="http://www.att.net/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">AT&T search (powered by Google)</a>',
+'bungeebonesdotcom','<a href="http://BungeeBones.com/search.php/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">BungeeBones</a>',
'go','Go.com',
-'askde','<a href="http://de.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Ask Deutschland</a>',
-'askes','<a href="http://es.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Ask España</a>', # break out Ask country specific engines.
-'askfr','<a href="http://fr.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Ask France</a>',
-'askit','<a href="http://it.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Ask Italia</a>',
-'asknl','<a href="http://nl.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Ask Nederland</a>',
-'ask','<a href="http://www.ask.com/" title="Search Engine Home Page [new window]" target="_blank">Ask</a>',
+'askde','<a href="http://de.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Deutschland</a>',
+'askes','<a href="http://es.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask España</a>', # break out Ask country specific engines.
+'askfr','<a href="http://fr.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask France</a>',
+'askit','<a href="http://it.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Italia</a>',
+'asknl','<a href="http://nl.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Nederland</a>',
+'ask','<a href="http://www.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask</a>',
'atomz','Atomz',
'dejanews','DejaNews',
'euroseek','Euroseek',
'sweetpacks', '<a href="http://www.sweetpacks-search.com/" rel="nofollow" title="Sweetpacks [new window]" target="_blank">Sweetpacks</a>',
'searchgol', '<a href="http://www.searchgol.com/" rel="nofollow" title="Search-Gol [new window]" target="_blank">Search-Gol</a>',
'duckduckgo', '<a href="http://r.duckduckgo.com/" rel="nofollow" title="DuckDuckGo Home Page [new window]" target="_blank">DuckDuckGo (Does not provide search keyphrases, using found page instead)</a>',
-'facemoods', '<a href="http://sr.facemoods.com/" title="Facemoods Search [new window]" target="_blank">Facemoods Search</a>',
-'shoppstop', '<a href="http://www.shoppstop.com/" title="ShoppStop [new window]" target="_blank">ShoppStop</a>',
-'searchya', '<a href="http://www.searchya.com/" title="Searchya [new window]" target="_blank">Searchya</a>',
-'picsearch', '<a href="http://www.picsearch.de/" title="picsearch [new window]" target="_blank">picsearch</a>',
-'webssearches', '<a href="http://www.webssearches.com/" title="Web Searches [new window]" target="_blank">Web Searches</a>',
-'inspsearch_com', '<a href="http://airzip.inspsearch.com/" title="airzip.inspsearch.com [new window]" target="_blank">airzip.inspsearch.com</a> (related to <a href="http://www.webssearches.com/" title="www.webssearches.com [new window]" target="_blank">http://www.webssearches.com/</a>?)',
-'zapmeta', '<a href="http://www.zapmeta.de/" title="ZapMeta [new window]" target="_blank">ZapMeta</a>',
-'localmoxie', '<a href="http://www.localmoxie.com/" title="Local Moxie [new window]" target="_blank">Local Moxie</a>',
-'search-results_mobi', '<a href="http://search-results.mobi/" title="search-results.mobi [new window]" target="_blank">search-results.mobi</a>',
-'androidsearch', '<a href="http://www.androidsearch.com/" title="androidsearch.com [new window]" target="_blank">androidsearch.com</a>',
-'isearch_nation_com', '<a href="http://isearch.nation.com/" title="Nation Search [new window]" target="_blank">Nation Search</a>',
-'search_zonealarm_com', '<a href="http://search.zonealarm.com/" title="Zone Alarm Search [new window]" target="_blank">Zone Alarm Search</a>',
-'www_buenosearch_com', '<a href="http://www.buenosearch.com/" title="BuenoSearch [new window]" target="_blank">BuenoSearch</a>',
-'search_foxtab_com', '<a href="http://search.foxtab.com/" title="Foxtab Search [new window]" target="_blank">Foxtab Search</a>',
-'searches_qone8_com', '<a href="http://searches.qone8.com/" title="Omiga-Plus [new window]" target="_blank">Omiga-Plus</a>',
-'startpage_com', '<a href="http://startpage.com/" title="Startpage [new window]" target="_blank">Startpage</a>',
-'qwant_com', '<a href="https://www.qwant.com/" title="qwant.com [new window]" target="_blank">qwant.com</a>',
-'safehomepage_com', '<a href="http://searches.safehomepage.com/" title="safehomepage.com [new window]" target="_blank">safehomepage.com</a>',
-'vi-view_com', '<a href="http://searches.vi-view.com/" title="vi-view.com [new window]" target="_blank">vi-view.com</a>',
-'wow_utop_it', '<a href="http://wow.utop.it/" title="wow.utop.it [new window]" target="_blank">wow.utop.it</a>',
-'windowssearch_com', '<a href="http://www.windowssearch.com/search?q=AWStats" title="Example page [new window]" target="_blank">windowssearch.com</a>',
-'www_wow_com', '<a href="http://www.wow.com/" title="www.wow.com [new window]" target="_blank">WOW.com</a>',
-'globososo_com', '<a href="http://searches.globososo.com/" title="Globososo (Kingtale Technology) [new window]" target="_blank">Globososo</a>',
+'facemoods', '<a href="http://sr.facemoods.com/" rel="nofollow" title="Facemoods Search [new window]" target="_blank">Facemoods Search</a>',
+'shoppstop', '<a href="http://www.shoppstop.com/" rel="nofollow" title="ShoppStop [new window]" target="_blank">ShoppStop</a>',
+'searchya', '<a href="http://www.searchya.com/" rel="nofollow" title="Searchya [new window]" target="_blank">Searchya</a>',
+'picsearch', '<a href="http://www.picsearch.de/" rel="nofollow" title="picsearch [new window]" target="_blank">picsearch</a>',
+'webssearches', '<a href="http://www.webssearches.com/" rel="nofollow" title="Web Searches [new window]" target="_blank">Web Searches</a>',
+'inspsearch_com', '<a href="http://airzip.inspsearch.com/" rel="nofollow" title="airzip.inspsearch.com [new window]" target="_blank">airzip.inspsearch.com</a> (related to <a href="http://www.webssearches.com/" rel="nofollow" title="www.webssearches.com [new window]" target="_blank">http://www.webssearches.com/</a>?)',
+'zapmeta', '<a href="http://www.zapmeta.de/" rel="nofollow" title="ZapMeta [new window]" target="_blank">ZapMeta</a>',
+'localmoxie', '<a href="http://www.localmoxie.com/" rel="nofollow" title="Local Moxie [new window]" target="_blank">Local Moxie</a>',
+'search-results_mobi', '<a href="http://search-results.mobi/" rel="nofollow" title="search-results.mobi [new window]" target="_blank">search-results.mobi</a>',
+'androidsearch', '<a href="http://www.androidsearch.com/" rel="nofollow" title="androidsearch.com [new window]" target="_blank">androidsearch.com</a>',
+'isearch_nation_com', '<a href="http://isearch.nation.com/" rel="nofollow" title="Nation Search [new window]" target="_blank">Nation Search</a>',
+'search_zonealarm_com', '<a href="http://search.zonealarm.com/" rel="nofollow" title="Zone Alarm Search [new window]" target="_blank">Zone Alarm Search</a>',
+'www_buenosearch_com', '<a href="http://www.buenosearch.com/" rel="nofollow" title="BuenoSearch [new window]" target="_blank">BuenoSearch</a>',
+'search_foxtab_com', '<a href="http://search.foxtab.com/" rel="nofollow" title="Foxtab Search [new window]" target="_blank">Foxtab Search</a>',
+'searches_qone8_com', '<a href="http://searches.qone8.com/" rel="nofollow" title="Omiga-Plus [new window]" target="_blank">Omiga-Plus</a>',
+'startpage_com', '<a href="http://startpage.com/" rel="nofollow" title="Startpage [new window]" target="_blank">Startpage</a>',
+'qwant_com', '<a href="https://www.qwant.com/" rel="nofollow" title="qwant.com [new window]" target="_blank">qwant.com</a>',
+'safehomepage_com', '<a href="http://searches.safehomepage.com/" rel="nofollow" title="safehomepage.com [new window]" target="_blank">safehomepage.com</a>',
+'vi-view_com', '<a href="http://searches.vi-view.com/" rel="nofollow" title="vi-view.com [new window]" target="_blank">vi-view.com</a>',
+'wow_utop_it', '<a href="http://wow.utop.it/" rel="nofollow" title="wow.utop.it [new window]" target="_blank">wow.utop.it</a>',
+'windowssearch_com', '<a href="http://www.windowssearch.com/search?q=AWStats" rel="nofollow" title="Example page [new window]" target="_blank">windowssearch.com</a>',
+'www_wow_com', '<a href="http://www.wow.com/" rel="nofollow" title="www.wow.com [new window]" target="_blank">WOW.com</a>',
+'globososo_com', '<a href="http://searches.globososo.com/" rel="nofollow" title="Globososo (Kingtale Technology) [new window]" target="_blank">Globososo</a>',
+'swisscows_ch', '<a href="https://swisscows.ch/" rel="nofollow" title="Swisscows search page [new window]" target="_blank">Swisscows</a>',
+'globososo_com', '<a href="http://searches3.globososo.com/" rel="nofollow" title="Globososo search page [new window]" target="_blank">Globososo</a>',
+'preciobarato_xyz', '<a href="http://preciobarato.xyz/" rel="nofollow" title="preciobarato.xyz offline on Oct 2, 2015 [new window]" target="_blank">Yandex</a>',
# Chello Portals
'chelloat','<a href="http://www.chello.at/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Austria</a>',
'chellobe','<a href="http://www.chello.be/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Belgium</a>',
'askuk','<a href="http://uk.ask.com/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask UK</a>',
'bbc','BBC', 'freeserve','Freeserve', 'looksmartuk','Looksmart UK',
'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk',
-'fbdownloader','<a href="http://search.fbdownloader.com/" rel="nofollow" title="FBDownloader Home Page [new window]" target="_blank">FBDownloader</a>',
+'fbdownloader','<a href="http://search.fbdownloader.com/" rel="nofollow" title="FBDownloader Home Page [new window]" target="_blank">FBDownloader (fbdownloader)</a>',
+'fdownloadr_com', '<a href="http://search.fdownloadr.com/" rel="nofollow" title="FBDownloader Home Page [new window]" target="_blank">FBDownloader (fdownloadr)</a>',
'babylon','<a href="http://search.babylon.com/" rel="nofollow" title="Babylon Home Page [new window]" target="_blank">Babylon</a>',
'allgameshome', '<a href="http://my.allgameshome.com/" rel="nofollow" title="AllGamesHome [new window]" target="_blank">AllGamesHome</a>',
-'surfcanyon_com', '<a href="http://search.surfcanyon.com/" title="SurfCanyon [new window]" target="_blank">SurfCanyon</a>',
+'surfcanyon_com', '<a href="http://search.surfcanyon.com/" rel="nofollow" title="SurfCanyon [new window]" target="_blank">SurfCanyon</a>',
+'uk_foxstart_com', '<a href="http://uk.foxstart.com/" rel="nofollow" title="Foxstart search page [new window]" target="_blank">Foxstart.com</a>',
+'yandex_com', '<a href="http://yandex.com/" rel="nofollow" title="Yandex search page [new window]" target="_blank">Yandex</a>',
# Minor finnish search engines
'haku','Ihmemaa',
# Minor french search engines
'wowsearch', '<a href="http://de.wow.com/" rel="nofollow" title="Wow Search Home Page [new window]" target="_blank">Wow Search</a>',
'vlips_de', '<a href="http://www.vlips.de/semags_proxy.php" rel="nofollow" title="vlips Proxy Page (Homepage not yet ready?) [new window]" target="_blank">vlips.de</a>',
'metager', '<a href="http://www.metager.de" rel="nofollow" title="MetaGer Home Page [new window]" target="_blank">MetaGer</a>',
-'search_1und1_de', '<a href="http://search.1und1.de/" rel="nofollow" title="1&1 Suche [new window]" target="_blank">1&1 Suche</a>',
+'search_1und1_de', '<a href="http://search.1und1.de/" rel="nofollow" title="1&1 Suche [new window]" target="_blank">1&1 Suche (subdomain "search")</a>',
'smde', '<a href="http://www.sm.de/" rel="nofollow" title="SM.de - Die SuchMaschine [new window]" target="_blank">SM.de - Die SuchMaschine</a>',
'sumaja', '<a href="http://www.sumaja.de/" rel="nofollow" title="Sumaja [new window]" target="_blank">Sumaja</a>',
'navigationshilfe', '<a href="http://navigationshilfe.t-online.de/" rel="nofollow" title="T-Online Navigationshilfe [new window]" target="_blank">T-Online Navigationshilfe</a>',
-'umfis', '<a href="http://www.umfis.de/" title="UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland [new window]" target="_blank">UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland</a>',
-'fastbot_de', '<a href="http://fastbot.de/" title="Fastbot.de [new window]" target="_blank">Fastbot.de (Does not provide search keyphrases, using found page instead)</a>',
-'tixuma_de', '<a href="http://www.tixuma.de/" title="Tixuma Deutschland [new window]" target="_blank">Tixuma Deutschland</a>',
-'freenet_de', '<a href="http://suche.freenet.de/" title="suche.freenet.de [new window]" target="_blank">suche.freenet.de</a>',
-'izito_de', '<a href="http://www.izito.de/" title="iZito Deutschland [new window]" target="_blank">iZito Deutschland</a>',
-'peoplecheck_de', '<a href="http://peoplecheck.de/" title="PeopleCheck.de [new window]" target="_blank">PeopleCheck.de</a>',
-'oneseek_de', '<a href="http://www.oneseek.de" title="Metasuchmaschine OneSeek.de [new window]" target="_blank">Metasuchmaschine OneSeek.de</a>',
-'de_wiki_gov_cn', '<a href="http://de.wiki.gov.cn/" title="Wiki Sucher [new window]" target="_blank">Wiki Sucher</a>',
+'umfis', '<a href="http://www.umfis.de/" rel="nofollow" title="UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland [new window]" target="_blank">UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland</a>',
+'fastbot_de', '<a href="http://fastbot.de/" rel="nofollow" title="Fastbot.de [new window]" target="_blank">Fastbot.de (Does not provide search keyphrases, using found page instead)</a>',
+'tixuma_de', '<a href="http://www.tixuma.de/" rel="nofollow" title="Tixuma Deutschland [new window]" target="_blank">Tixuma Deutschland</a>',
+'freenet_de', '<a href="http://suche.freenet.de/" rel="nofollow" title="suche.freenet.de [new window]" target="_blank">suche.freenet.de</a>',
+'izito_de', '<a href="http://www.izito.de/" rel="nofollow" title="iZito Deutschland [new window]" target="_blank">iZito Deutschland</a>',
+'peoplecheck_de', '<a href="http://peoplecheck.de/" rel="nofollow" title="PeopleCheck.de [new window]" target="_blank">PeopleCheck.de</a>',
+'oneseek_de', '<a href="http://www.oneseek.de" rel="nofollow" title="Metasuchmaschine OneSeek.de [new window]" target="_blank">Metasuchmaschine OneSeek.de</a>',
+'de_wiki_gov_cn', '<a href="http://de.wiki.gov.cn/" rel="nofollow" title="Wiki Sucher [new window]" target="_blank">Wiki Sucher</a>',
+'umuwa_de', '<a href="http://umuwa.de/" rel="nofollow" title="Umuwa Deutschland [new window]" target="_blank">Umuwa Deutschland</a>',
+'1und1_de', '<a href="http://suche.1und1.de/" rel="nofollow" title="1&1 Suche [new window]" target="_blank">1&1 Suche (subdomain "suche")</a>',
+'metasuche_ch', '<a href="http://www.metasuche.ch/" rel="nofollow" title="Metasuche search page [new window]" target="_blank">Metasuche.ch</a>',
# Minor hungarian search engines
'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkereso', 'goliat','Goliat', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Kereso',
'tango_hu','<a href="http://tango.hu/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Tango</a>',
# Minor Japanese search engines
'askjp','<a href="http://www.ask.jp/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Ask Japan</a>',
'sagool','<a href="http://sagool.jp/" rel="nofollow" title="Sagool home page [new window]" target="_blank">Sagool</a>',
-'rakuten', '<a href="http://websearch.rakuten.co.jp" title="websearch.rakuten.co.jp [new window]" target="_blank">websearch.rakuten.co.jp</a>',
+'rakuten', '<a href="http://websearch.rakuten.co.jp" rel="nofollow" title="websearch.rakuten.co.jp [new window]" target="_blank">websearch.rakuten.co.jp</a>',
# Minor Norwegian search engines
'start','start.no', 'eniro','<a href="http://www.eniro.no/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Eniro</a>',
# Minor polish search engines
'sapo','<a href="http://www.sapo.pt/" rel="nofollow" title="Sapo search engine home page [new window]" target="_blank">Sapo</a>',
# Minor Swiss search engines
'searchch', 'search.ch', 'bluewin', 'search.bluewin.ch',
-'zapmeta_ch', '<a href="http://www.zapmeta.ch/" title="ZapMeta.ch [new window]" target="_blank">ZapMeta.ch</a>',
-'etools_ch', '<a href="http://www.etools.ch/" title="eTools.ch [new window]" target="_blank">eTools.ch</a>',
+'zapmeta_ch', '<a href="http://www.zapmeta.ch/" rel="nofollow" title="ZapMeta.ch [new window]" target="_blank">ZapMeta.ch</a>',
+'etools_ch', '<a href="http://www.etools.ch/" rel="nofollow" title="eTools.ch [new window]" target="_blank">eTools.ch</a>',
# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines
'pogodak','<a href="http://www.pogodak.com" rel="nofollow" title="Pogodak search engine home page [new window]" target="_blank">Pogodak.com</a>',
# Generic search engines
#}
#print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen;
-1;
\ No newline at end of file
+1;