From 775d2bcb286c3a0533d58bc3b3a7b7f5178d83b3 Mon Sep 17 00:00:00 2001 From: visualperception Date: Fri, 2 Sep 2016 04:58:49 +0100 Subject: [PATCH] Alternative robots.pm and search_engines.pm both files have many additions and corrections. Especially search_engines.pm which has had all google countries added to it and split into: google country name google country name images google country name catchall (for any subdomains we don't know about yet) --- wwwroot/cgi-bin/lib/robots.pm | 4792 ++++++++++--------- wwwroot/cgi-bin/lib/search_engines.pm | 6069 ++++++++++++++++++------- 2 files changed, 7064 insertions(+), 3797 deletions(-) diff --git a/wwwroot/cgi-bin/lib/robots.pm b/wwwroot/cgi-bin/lib/robots.pm index f6124146..a77760a9 100644 --- a/wwwroot/cgi-bin/lib/robots.pm +++ b/wwwroot/cgi-bin/lib/robots.pm @@ -1,2219 +1,2573 @@ -# AWSTATS ROBOTS DATABASE -#------------------------------------------------------- -# If you want to add robots to extend AWStats database detection capabilities, -# you must add an entry in RobotsSearchIDOrder_listx and RobotsHashIDLib. - -# The entry in RobotsSearchIDOrder_listx is a Perl regular expression -# (see http://perldoc.perl.org/perlreref.html). AWSTats applies these -# expressions to the user agent string in the order given by the lists. The -# first match specifies the robot. -# -# Note: This regular expression must not contain any whitespace. -# Otherwise AWStats will produce lines in the database that -# will be misinterpreted and as a consequence the corresponding data in the -# generated HTML reports will be wrong. If you want to match whitespace in -# the user agent string, use other constructs like '\s', '[:blank:]', -# '\p{IsSpace}', '\x20' etc. -# -# The corresponding entry in RobotsHashIDLib contains the regular expression -# as key, followed by a string containing HTML-text. AWStats inserts this -# text into reports to describe the bot. If possible the text should contain -# a link to the bot home page. This makes it easier for sysadmins to find -# the information necessary e.g. to adapt the robots.txt file. -# -# An entry in the RobotsAffiliateLib is not necessary. An entry in this list -# contains as first part the regular expression specifying the bot. The -# second part is a string that gives the Company or product managing the bot. -# This information is not used yet. -# -# There are several sorts of bots that AWStats is not able to detect and -# therefore a considerable amount of bot generated traffic counts -# as user traffic: -# -# a) A crawler that identifies itself in the referrer string, but not in -# the user agent string. An example is the crawler from semalt.semalt.com. -# -# b) Crawlers that correctly access robots.txt but identify themselves in -# in the user agent string only once or just a few times. Most of the -# time a user agent string ist used that does not contain hints that -# a bot is involved. An example is the iCjobs spider. -# msnbot-UDiscovery/2.0b seems to show this behaviour too. -# -# -# -#------------------------------------------------------- - -# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html -# added dipsie (not tested with real data). -# added DomainsDB.net http://domainsdb.net/ -# added ia_archiver-web.archive.org (was inadvertently grouped with Alexa traffic) -# added Nutch (used by looksmart (furl?)) -# added rssImagesBot -# added Sqworm -# added t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e -# added w3c css-validator -# added documentation link to bot home pages for above and selected major bots. -# In the case of international bots, choose .com page. -# Included tool tip (html "title"). -# To do: parameterize to match both AWStats language and tooltips settings. -# To do: add html links for all bots based on current documentation in source -# files referenced below. -# changed '\wbot[\/\-]', to '\wbot[\/\-]' (removed comma) -# made minor grammar corrections to notes below -# 2005-08-24 added YahooSeeker-Testing -# added w3c-checklink -# updated url for ask.com -# 2005-08-24 added Girafabot http://www.girafa.com/ -# 2005-08-30 added PluckFeedCrawler http://www.pluck.com/ -# added Gaisbot/3.0 (robot05@gais.cs.ccu.edu.tw; ) -# dded geniebot (wgao@genieknows.com) -# added BecomeBot link http://www.become.com/site_owners.html -# added topicblogs http://www.topicblogs.com/ -# added Powermarks; seen used by referrer spam -# added YahooSeeker -# added NG/2. http://www.exabot.com/ -# 2005-09-15 added link for Walhello appie -# added bender focused_crawler -# updated YahooSeeker description (blog crawler) -# 2005-09-16 added link for http://linkchecker.sourceforge.net -# added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl) -# added Blogslive info@blogslive.com intelliseek.com -# added BlogPulse (ISSpider-3.0) intelliseek.com -# 2005-09-26 added Feedfetcher-Google (http://www.google.com/feedfetcher.html) -# added EverbeeCrawler -# added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html -# added link for Bloglines http://www.bloglines.com -# 2005-10-19 fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html) -# added Blogshares Spiders (Synchronized V1.5.1) -# added yacy -# 2005-11-21 added Argus www.simpy.com -# added BlogsSay :: RSS Search Crawler (http://www.blogssay.com/) -# added MJ12bot http://majestic12.co.uk/bot.php -# added OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm) -# added OutfoxBot/0.3 (For internet experiments; outfox.agent@gmail.com) -# added RufusBot Rufus Web Miner http://64.124.122.252.webaroo.com/feedback.html -# added Seekbot (http://www.seekbot.net/bot.html) -# added Yahoo-MMCrawler/3.x (mms-mmcrawler-support@yahoo-inc.com) -# added link for BaiDuSpider -# added link for Blogshares Spider -# added link for StackRambler http://www.rambler.ru/doc/faq.shtml -# added link for WISENutbot -# added link for ZyBorg/1.0 (wn-14.zyborg@looksmart.net; http://www.WISEnutbot.com. Moved location to above wisenut to avoid classification as wisenut -# 2005-12-15 -# added FAST Enteprise Crawler/6 (www dot fastsearch dot com). Note spelling Enteprise not Enterprise. -# added findlinks http://wortschatz.uni-leipzig.de/findlinks/ -# added IBM Almaden Research Center WebFountain™ http://www.almaden.ibm.com/cs/crawler [hc3] -# added INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents) -# added lmspider (lmspider@scansoft.com) http://www.nuance.com/ -# added noxtrumbot http://www.noxtrum.com/ -# added SandCrawler (Microsoft) -# added SBIder http://www.sitesell.com/sbider.html -# added SeznamBot http://fulltext.seznam.cz/ -# added sohu-search http://corp.sohu.com/ (looked for //robots.txt not /robots.txt) -# added the ruffle SemanticWeb crawler v0.5 - http://www.unreach.net -# added WebVulnCrawl/1.0 libwww-perl/5.803 (looked for //robots.txt not /robots.txt) -# added Yahoo! Japan keyoshid http://www.yahoo.co.jp/ -# added Y!J http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html -# added link for GigaBot -# added link for MagpieRSS -# added link for MSIECrawler -# 2005-12-21 -# added aipbot http://www.aipbot.com aipbot@aipbot.com [matthys70 users.sourceforge.net] -# added Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp) -# added Fast-Search-Engine http://www.fast-search-engine.com/ [matthys70 users.sourceforge.net] -# added g2Crawler (nobody@airmail.net) http://crawler.instantnetworks.net/ -# added Jakarta commons-httpclient http://jakarta.apache.org/commons/httpclient/ (hit robots.txt). May be used as robot or browser - a site may want to remove this entry. -# added OmniExplorer_Bot http://www.omni-explorer.com/ [matthys70 users.sourceforge.net] -# added USTC-Semantic-Group ai.ustc.edu.cn/mas/en/research/index.php ? -# 2005-12-22 -# added EARTHCOM.info www.earthcom.info -# added HTTrack off-line browser 'httrack','HTTrack', http://www.httrack.com/ [Moizes Gabor] -# added KummHttp http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b [Moizes Gabor] -# 2006-01-01 -# added Dulance http://www.dulance.com/bot.jsp -# added MojeekBot http://www.mojeek.com/bot.html -# added nicebot http://www.egghelp.org/setup.htm ? -# added Snappy http://www.urltrends.com/faq.php -# added sohu agent -# added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net] -# added zspider http://feedback.redkolibri.com/ -# 2006-01-13 -# added boitho.com-dc http://www.boitho.com/dcbot.html -# added IRLbot http://irl.cs.tamu.edu/crawler -# added virus_detector virus_harvester@securecomputing.com -# added Wavefire http://www.wavefire.com; info@wavefire.com -# added WebFilter Robot -# 2006-01-24 -# added Shim-Crawler http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp -# added Exabot exabot.com -# added LetsCrawl.com http://letscrawl.com -# added ichiro http://help.goo.ne.jp/door/crawlerE.html -# 2006-01-27 additional 22 robots from a list provided by Moizes Gabor -# added ALeadSoftbot http://www.aleadsoft.com/bot.htm -# added CipinetBot http://www.cipinet.com/bot.html -# added Cuasarbot http://www.cuasar.com/ -# added Dumbot http://www.dumbfind.com/ -# added Extreme_Picture_Finder http://www.exisoftware.com/ -# added Fooky.com/ScorpionBot/ScoutOut http://www.fooky.com/scorpionbots -# added IlTrovatore-Setaccio http://www.iltrovatore.it/aiuto/motore_di_ricerca.html bot@iltrovatore.it -# added InsurancoBot http://www.fastspywareremoval.com/ -# added InternetArchive http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org -# added KazoomBot http://www.kazoom.ca/bot.html kazoombot@kazoom.ca -# added Kurzor http://www.easymail.hu/ cursor@easymail.hu -# added NutchCVS http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org -# added NutchOSU-VLIB http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org -# added Orbiter http://www.dailyorbit.com/bot.htm -# added PHP_version_tracker http://www.nexen.net/phpversion/bot.php -# added SuperBot http://www.sparkleware.com/superbot/ -# added SynooBot http://www.synoo.de/bot.html webmaster@synoo.com -# added TestBot http://www.agbrain.com/ -# added TutorGigBot http://www.tutorgig.info/ -# added WebIndexer mailto://webindexerv1@yahoo.com -# added WebMiner http://64.124.122.252/feedback.html -# 2006-02-01 -# added heritrix https://sourceforge.net/forum/message.php?msg_id=3550202 -# added Zeus Webster Pro https://sourceforge.net/forum/message.php?msg_id=3141164 -# additional robots from a list provided by Moizes Gabor [ mojzi -a-t- free mail hu ] -# added Candlelight_Favorites_Inspector -# added DomainChecker -# added EasyDL -# added FavOrg -# added Favorites_Sweeper -# added Html_Link_Validator -# added Internet_Ninja -# added JRTwine_Software_Check_Favorites_Utility -# fixed Microsoft_URL_Control -# added miniRank -# added Missigua_Locator -# added NPBot -# added Ocelli -# added Onet.pl_SA -# added proodleBot -# added SearchGuild_DMOZ_Experiment -# added Susie -# added Website_Monitoring_Bot -# added Xenu_Link_Sleuth -# 2006-05-15 -# added ASPseek http://www.aspseek.org/ -# added AdamM Bot http://home.blic.net/adamm/ -# added archive.org_bot http://crawls.archive.org/collections/bncf/crawl.html -# added arianna.libero.it (Italian Portal/search engine) -# added Biz360 spider http://www.biz360.com -# added BlogBridge Service http://www.blogbridge.com/ -# added BlogSearch http://www.icerocket.com/ -# added libcrawl -# added edgeio-relanshanbottriever http://www.edgeio.com -# added FeedFlow http://feedflow.com/about -# added Biblioteca Nazionale Centrale di Firenze (Italian National Archive) http://www.bncf.firenze.sbn.it/raccolta.txt -# added Java catchall - used by many spam bots -# added lanshanbot http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_g_l_140406_1%5Cb -# added msnbot-media http://search.msn.com/msnbot.htm -# added MT::Telegraph::Agent -# added Netluchs http://www.netluchs.de/ (German SE bot) -# added oBot http://www.webmasterworld.com/forum11/1616.htm -# added Onfolio http://www.onfolio.com/ (IE Toolbar plugin) - hit rss feeds. -# added ping.blo.gs http://blo.gs/ping.php blog bot -# added Sphere Scout http://www.sphere.com/ -# added sproose crawler http://www.sproose.com/bot.html -# added SyndicAPI http://syndicapi.com/bot.html -# added Yahoo! Mindset http://mindset.research.yahoo.com/ -# added msrabot -# added Vagabondo & Vagabondo-WAP http://www.wise-guys.nl/Contact/index.php?botselected=webagents&lang=uk -# fixed Missigua Locator detection (Missigua_Locator -> Missigua Locator) -# changed echo to echo! to avoid conflict with the bonecho (Firefox 2.0) browser. -# This requires you to reprocess historic logs if you want EchO! to be recognized for older reports. -# 2006-05-17 -# added Alpha Search Agent # 62.152.125.60 Eurologon Srl -# added Krugle http://www.krugle.com/crawler/info.html the search engine for developers -# added Octora Beta Bot http://www.octora.com/ # Blog and Rss Search Engine -# added UbiCrawler http://law.dsi.unimi.it/ubicrawler/ -# added Yahoo! Slurp China http://misc.yahoo.com.cn/help.html -# You must reprocess old logs for the Yahoo! Slurp China bot to be detected in old reports -# 2006-05-20 -# added 1-More Scanner http://www.myzips.com/software/1-More-Scanner.phtml -# added Accoona-AI-Agent http://www.accoona.com/ -# added ActiveBookmark http://www.libmaster.com/active_bookmark.php -# added BIGLOTRON http://www.biglotron.com/robot.html -# added Bookmark-Manager http://bkm.sourceforge.net/ -# added cbn00glebot -# added Cerberian Drtrs http://www.pgts.com.au/cgi-bin/psql?robot_info=25240 -# added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork -# added CheckWeb link validator http://p.duby.free.fr/chkweb.htm -# added Computer and Automation Research Institute Crawler http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html -# added ConveraCrawler http://www.authoritativeweb.com/crawl/ -# added ConveraMultiMediaCrawler http://www.authoritativeweb.com/crawl/ -# added CSE HTML Validator Lite Online http://online.htmlvalidator.com/php/onlinevallite.php -# added Cursor http://adcenter.hu/docs/en/bot.html -# added Custo http://www.netwu.com/custo/ -# added DataFountains/DMOZ Downloader http://infomine.ucr.edu/ -# added Deepindex http://www.deepindex.net/faq.php -# added DNSGroup http://www.dnsgroup.com/ -# added DoCoMo http://www.nttdocomo.co.jp/ -# added dumm.de-Bot http://www.dumm.de/ -# added ETS v http://www.freetranslation.com/help/ -# added eventax http://www.eventax.de/ -# added FAST Enterprise Crawler * crawleradmin.t-info@telekom.de http://www.telekom.de/ -# added FAST Enterprise Crawler http://www.fast.no/ -# added FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de http://www.telekom.de/ -# added FeedValidator http://feedvalidator.org/ -# added FilmkameraBot http://www.filmkamera.at/bot.html -# added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece -# added Global Fetch http://www.wesonet.com/ -# added GOFORITBOT http://www.goforit.com/about/ -# added GoForIt.com http://www.goforit.com/about/ -# added GPU p2p crawler http://gpu.sourceforge.net/search_engine.php -# added HooWWWer http://cosco.hiit.fi/search/hoowwwer/ -# added HPPrint -# added HTMLParser http://htmlparser.sourceforge.net/ -# added Hundesuche.com-Bot http://www.hundesuche.com/ -# added InfoBot http://www.infobot.org/ -# added InfociousBot http://corp.infocious.com/tech_crawler.php -# added InternetSupervision http://internetsupervision.com/ -# added isearch2006 http://www.yahoo.com.cn/ -# added IUPUI_Research_Bot http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/ -# added KalamBot http://64.124.122.251/feedback.html -# added kamano.de NewsFeedVerzeichnis http://www.kamano.de/ -# added Kevin http://dznet.com/kevin/ -# added KnowItAll http://www.cs.washington.edu/research/knowitall/ -# added Knowledge.com http://www.knowledge.com/ -# added Kouaa Krawler http://www.kouaa.com/ -# added ksibot http://ego.ms.mff.cuni.cz/ -# added Link Valet Online http://www.htmlhelp.com/tools/valet/ -# added lwp-request http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request -# added lwp-trivial http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm -# added MapoftheInternet.com http://MapoftheInternet.com/ -# added Matrix S.p.A. - FAST Enterprise Crawler http://tin.virgilio.it/ -# added Megite http://www.megite.com/ -# added Metaspinner http://index.meta-spinner.de/ -# added Mini-reptile -# added Misterbot http://www.misterbot.fr/ -# added Miva http://www.miva.com/ -# added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_m_141105_2\b -# added MSRBOT http://research.microsoft.com/research/sv/msrbot/ -# added MS SharePoint Portal Server - MS Search 4.0 Robot http://support.microsoft.com/default.aspx?scid=kb;en-us;284022 -# added Mydoyouhike http://www.doyouhike.net/my -# added NASA Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b -# added NetSprint http://www.netsprint.pl/serwis/ -# added NimbleCrawler http://www.healthline.com/ -# added OpenWebSpider http://www.openwebspider.org/ -# added Oracle Ultra Search http://www.oracle.com/technology/products/ultrasearch/index.html -# added OSSProxy http://www.marketscore.com/FAQ.Aspx -# added passwordmaker.org http://passwordmaker.org/ -# added PEAR HTTP Request class http://pear.php.net/ -# added PEERbot http://www.peerbot.com/ -# added PHP version tracker http://www.nexen.net/phpversion/bot.php -# added PictureOfInternet http://malfunction.org/poi/ -# added plinki http://www.plinki.com/ -# added Port Huron Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1133\b -# added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1135\b -# added ProjectWF-java-test-crawler -# added PyQuery http://sourceforge.net/projects/pyquery/ -# added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/ -# added Scumbot -# added Sensis Web Crawler http://www.sensis.com.au/ -# added snap.com beta crawler http://www.snap.com/ -# added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/ -# added STEROID Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm -# added Suchfin-Bot http://www.suchfin.de/ -# added Sunrise http://www.sunrisexp.com/ -# added Tagyu Agent http://www.tagyu.com/ -# added Tcl http client package http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm -# added TeragramCrawlerSURF http://www.teragram.com/ -# added Test Crawler http://netp.ath.cx/ -# added UnChaos Bot Hybrid Web Search Engine http://www.unchaos.com/ -# added unido-bot http://www.unchina.org/unido/unido/our_projects/3_3.html -# added UniversalFeedParser http://feedparser.org/ (seen from md301000.inktomisearch.com) -# added updated http://www.updated.com/ -# added Vermut http://vermut.aol.com -# added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html -# added Vespa Crawler (Yahoo Norway?) http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb -# added VSE http://www.vivisimo.com/ -# added webcrawl.net http://www.webcrawl.net/ -# added Web Downloader http://www.krasu.ru/soft/chuchelo/ -# added Webdup http://www.webdup.com/en/index.html -# added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b -# added WordPress http://wordpress.org/ -# added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/ -# added Xenu's Link Sleuth (with ') -# added xirq http://www.xirq.com/ -# added yoogliFetchAgent http://www.yoogli.com/ -# added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/ -# -- fix - some robots were reported with _ where _ should have been a space. -# changed Xenu Link Sleuth -# changed microsoft[_+\s]url[_+\s]control -> microsoft_url_control -# changed favorites_sweeper -> favorites_sweeper -# -- updates -# updated AskJeeves to Ask -# 2012-06-05 Albrecht Mueller -# added Grabber from SDSC (San Diego Supercomputer Center). -# 2013-09-30 Albrecht Mueller -# AWStats probably cannot detect this bot as it identifies itself in -# the referrer field and not in the user agent string. -#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-" -#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-" -#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-" - -# to do MS Search 4.0 Robot - -#package AWSROB; - - -# Robots list was found at http://www.robotstxt.org/wc/active/all.txt -# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html -# Rem: To avoid bad detection, some robot's ids were removed from this list: -# - Robots with ID of 3 letters only -# - Robots called 'webs' and 'tcl' -# Rem: directhit changed into direct_hit (its real id) -# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser -# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser -# Rem: roadrunner changed into road_runner -# Rem: lycos changed to lycos_ to avoid confusion with lycos-online browser -# Rem: voyager changed into ^voyager\/ to avoid to exclude voyager and amigavoyager browser - -# RobotsSearchIDOrder -# It contains all matching criteria to search for in log fields. This list is -# used to know in which order to search Robot IDs. -# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more -# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more -# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+\s]' and are quoted. -#------------------------------------------------------- -@RobotsSearchIDOrder_list1 = ( -# Common robots (In robot file) -'appie', -'architext', -'bingpreview', -'bjaaland', -'contentmatch', -'ferret', -'googlebot\-image', -'googlebot', -'google\-sitemaps', -'google[_+\s]web[_+\s]preview', -'grabber', -'gulliver', -'virus[_+\s]detector', # Must be before harvest -'harvest', -'htdig', -'jeeves', -'linkwalker', -'lilina', -'lycos[_+\s]', -'moget', -'muscatferret', -'myweb', -'nomad', -'scooter', -'slurp', -'^voyager\/', -'weblayers', -# Common robots (Not in robot file) -'antibot', -'bruinbot', -'digout4u', -'echo!', -'fast\-webcrawler', -'ia_archiver\-web\.archive\.org', # Must be before ia_archiver to avoid confusion with alexa -'ia_archiver', -'jennybot', -'mercator', -'netcraft', -'msnbot\-media', -'msnbot-udiscovery', -'msnbot', -'petersnews', -'relevantnoise\.com', -'unlost_web_crawler', -'voila', -'webbase', -'webcollage', -'cfetch', -'zyborg', # Must be before wisenut -'wisenutbot' -); -@RobotsSearchIDOrder_list2 = ( -# Less common robots (In robot file) -'007ac9', -'[^a]fish', -'abcdatos', -'abonti\.com', -'acme\.spider', -'ahoythehomepagefinder', -'ahrefsbot', -'alkaline', -'anthill', -'arachnophilia', -'arale', -'araneo', -'aretha', -'ariadne', -'powermarks', -'arks', -'aspider', -'atn\.txt', -'atomz', -'auresys', -'backrub', -'bbot', -'bigbrother', -'blackwidow', -'blindekuh', -'bloodhound', -'borg\-bot', -'brightnet', -'bspider', -'cactvschemistryspider', -'calif[^r]', -'cassandra', -'cgireader', -'checkbot', -'christcrawler', -'churl', -'cienciaficcion', -'cms\scrawler', -'collective', -'combine', -'conceptbot', -'coolbot', -'core', -'cosmos', -'crazywebcrawler', -'cruiser', -'cusco', -'cyberspyder', -'desertrealm', -'deweb', -'dienstspider', -'digger', -'diibot', -'direct_hit', -'dnabot', -'domainappender', -'download_express', -'dragonbot', -'dwcp', -'e\-collector', -'ebiness', -'elfinbot', -'emacs', -'emcspider', -'esther', -'evliyacelebi', -'fastcrawler', -'feedcrawl', -'fdse', -'felix', -'fetchrover', -'fido', -'finnish', -'fireball', -'fouineur', -'francoroute', -'freecrawl', -'funnelweb', -'gama', -'gazz', -'gcreep', -'getbot', -'geturl', -'golem', -'gougou', -'grapnel', -'griffon', -'gromit', -'gulperbot', -'hambot', -'havindex', -'hometown', -'htmlgobble', -'hyperdecontextualizer', -'iajabot', -'iaskspider', -'hl_ftien_spider', -'sogou', -'icjobs\.de', -'iconoclast', -'ilse', -'imagelock', -'incywincy', -'informant', -'infoseek', -'infoseeksidewinder', -'infospider', -'inspectorwww', -'intelliagent', -'irobot', -'iron33', -'israelisearch', -'javabee', -'jbot', -'jcrawler', -'jobo', -'jobot', -'joebot', -'jubii', -'jumpstation', -'kapsi', -'katipo', -'kilroy', -'ko[_+\s]yappo[_+\s]robot', -'kummhttp', -'labelgrabber\.txt', -'larbin', -'legs', -'linkidator', -'linkscan', -'lockon', -'logo_gif', -'macworm', -'magpie', -'marvin', -'mattie', -'mediafox', -'merzscope', -'meshexplorer', -'mindcrawler', -'mnogosearch', -'momspider', -'monster', -'motor', -'muncher', -'mwdsearch', -'ndspider', -'nederland\.zoek', -'netcarta', -'netmechanic', -'netscoop', -'newscan\-online', -'nhse', -'northstar', -'nzexplorer', -'objectssearch', -'occam', -'octopus', -'openfind', -'orb_search', -'packrat', -'pageboy', -'parasite', -'patric', -'pegasus', -'perignator', -'perlcrawler', -'phantom', -'phpdig', -'piltdownman', -'pimptrain', -'pioneer', -'pitkow', -'pjspider', -'plumtreewebaccessor', -'poppi', -'portalb', -'psbot', -'python', -'raven', -'rbse', -'resumerobot', -'rhcs', -'road_runner', -'robbie', -'robi', -'robocrawl', -'robofox', -'robozilla', -'roverbot', -'rules', -'safetynetrobot', -'semalt', #Note: This entry will not work as this crawler identifies itself -# in the referrer string and not in the user agent string -'search\-info', -'search_au', -'searchprocess', -'senrigan', -'sgscout', -'shaggy', -'shaihulud', -'sift', -'simbot', -'sistrix', #Virus/trojan-infection? fr-crawler, ca-crawler? See https://www.projecthoneypot.org/ip_37.59.55.128, https://www.projecthoneypot.org/ip_198.27.80.144 -'site\-valet', -'sitetech', -'skymob', -'slcrawler', -'smartspider', -'snooper', -'solbot', -'speedy', -'spider[_+\s]monkey', -'spiderbot', -'spiderline', -'spiderman', -'spiderview', -'spry', -'sqworm', -'ssearcher', -'suke', -'sunrise', -'suntek', -'sven', -'tach_bw', -'tagyu_agent', -'tailrank', -'tarantula', -'tarspider', -'techbot', -'templeton', -'titan', -'titin', -'tkwww', -'tlspider', -'ucsd', -'udmsearch', -'universalfeedparser', -'urlck', -'valkyrie', -'verticrawl', -'victoria', -'visionsearch', -'voidbot', -'vwbot', -'w3index', -'w3m2', -'wallpaper', -'wanderer', -'wapspIRLider', -'webbandit', -'webcatcher', -'webcopy', -'webfetcher', -'webfoot', -'webinator', -'weblinker', -'webmirror', -'webmoose', -'webquest', -'webreader', -'webreaper', -'websnarf', -'webspider', -'webvac', -'webwalk', -'webwalker', -'webwatch', -'whatuseek', -'whowhere', -'wired\-digital', -'wmir', -'wolp', -'wombat', -'wordpress', -'worm', -'woozweb', -'wwwc', -'wz101', -'xenu\slink\ssleuth', -'xget', -# Other robots reported by users -'^finbot', #UA string starts with "finbot", should not match "elfinbot" -'^webindex$', #UA should not match "webindexer" -'1\-more_scanner', -'360spider', -'a6-indexer', -'accoona\-ai\-agent', -'activebookmark', -'adamm_bot', -'adsbot-google', -'advbot', -'affectv\.co\.uk', -'almaden', -'aipbot', -'aleadsoftbot', -'alpha_search_agent', -'allrati', -'aport', -'applebot', -'archive\-de\.com', -'archive\.org_bot', -'argus', # Must be before nutch -'arianna\.libero\.it', -'aspseek', -'asterias', -'awbot', -'backlinktest\.com', -'baiduspider', -'becomebot', -'bender', -'betabot', -'biglotron', -'bittorrent_bot', -'biz360[_+\s]spider', -'blexbot', -'blogbridge[_+\s]service', -'bloglines', -'blogpulse', -'blogsearch', -'blogshares', -'blogslive', -'blogssay', -'bncf\.firenze\.sbn\.it\/raccolta\.txt', -'bobby', -'boitho\.com\-dc', -'bookmark\-manager', -'boris', -'bubing', -'bumblebee', -'candlelight[_+\s]favorites[_+\s]inspector', -'careerbot', -'cbn00glebot', -'ccbot', -'cerberian_drtrs', -'cfnetwork', -'cipinetbot', -'checkweb_link_validator', -'cliqzbot', -'commons\-httpclient', -'computer_and_automation_research_institute_crawler', -'converamultimediacrawler', -'converacrawler', -'copubbot', -'cscrawler', -'cse_html_validator_lite_online', -'cuasarbot', -'cursor', -'custo', -'datafountains\/dmoz_downloader', -'dataprovider\.com', -'daumoa', -'daviesbot', -'daypopbot', -'deepindex', -'deusu', -'dipsie\.bot', -'dnsgroup', -'doccheckbot', -'domainchecker', -'domainsdb\.net', -'dotbot', -'duckduckgo-favicons-bot', -'dulance', -'dumbot', -'dumm\.de\-bot', -'earthcom\.info', -'easydl', -'eccp', -'edgeio\-retriever', -'ernst[:blank:]2\.0', -'ets_v', -'exactseek', -'extreme[_+\s]picture[_+\s]finder', -'eventax', -'everbeecrawler', -'everest\-vulcan', -'ezresult', -'enteprise', -'facebook', -'facebot', -'fast_enterprise_crawler.*crawleradmin\.t\-info@telekom\.de', -'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de', -'finderlein[_+\s]research[_+\s]crawler', -'matrix_s\.p\.a\._\-_fast_enterprise_crawler', # must come before fast enterprise crawler -'fast_enterprise_crawler', -'fast\-search\-engine', -'fastbot', -'favicon', -'favorg', -'favorites_sweeper', -'feedburner', -'feedfetcher\-google', -'feedflow', -'feedster', -'feedsky', -'feedvalidator', -'fetchbot', -'filmkamerabot', -'filterdb\.iss\.net', -'findlinks', -'findexa_crawler', -'firmilybot', -'foaf-search\.net', -'fooky\.com\/ScorpionBot', -'g2crawler', -'gaisbot', -'geniebot', -'genieo', -'gigablastopensource', -'gigabot', -'girafabot', -'global_fetch', -'gnodspider', -'goforit\.com', -'goforitbot', -'gonzo', -'grapeshot', -'grub', -'gpu_p2p_crawler', -'henrythemiragorobot', -'heritrix', -'holmes', -'hoowwwer', -'hpprint', -'htmlparser', -'html[_+\s]link[_+\s]validator', -'httrack', -'hundesuche\.com\-bot', -'i-bot', -'icarus6j', -'ichiro', -'idmarch', -'iltrovatore\-setaccio', -'implisensebot', -'infobot', -'infociousbot', -'infohelfer', -'infomine', -'insurancobot', -'integromedb\.org', -'internet[_+\s]ninja', -'internetarchive', -'internetseer', -'internetsupervision', -'ips\-agent', -'irlbot', -'isearch2006', -'istellabot', -'iupui_research_bot', -'izsearch', -'james\sbot', -'jobboerse', #AWStats seems not to find this one despite the fact that "JobboerseBot" and "jobboerse.com" appear in the UA-string, maybe some previous entry matches -'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility', -'justview', -'kalambot', -'kamano\.de_newsfeedverzeichnis', -'kazoombot', -'kevin', -'keyoshid', # Must come before Y!J -'kinjabot', -'kinja\-imagebot', -'knowitall', -'knowledge\.com', -'kouaa_krawler', -'krugle', -'ksibot', -'kurzor', -'lanshanbot', -'letscrawl\.com', -'libcrawl', -'linkbot', -'linkdex\.com', -'link_valet_online', -'metager\-linkchecker', # Must be before linkchecker -'linkchecker', -'linkstats\sbot', -'lipperhey', -'livejournal\.com', -'lmspider', -'loadtimebot', -'lssrocketcrawler', -'ltbot', -'ltx71', -'lwp\-request', -'lwp\-trivial', -'madaali\.de', -'magpierss', -'mail\.ru', -'mapoftheinternet\.com', -'meanpathbot', -'mediabot', -'mediapartners\-google', -'megaindex', -'megite', -'memorybot', -'metager2-verification-bot', -'metajobbot', #Does not show up in the results of Sep. 2015 despite the fact that the corresponing log file has about 40 entries containing "MetaJobBot" in the UA string - strange. -'metaspinner', -'miadev', -'microsoft\sbits', -'microsoft.*discovery', # = 'microsoft (?:office (?:protocol|existence)|data access internet publishing provider protocol) discovery', -'microsoft[_+\s]url[_+\s]control', -'mindupbot', -'mini\-reptile', -'minirank', -'missigua_locator', -'misterbot', -'miva', -'mizzu_labs', -'mj12bot', -'mojeekbot', -'msiecrawler', -'ms[_+\s]search[_+\s]6\.0[_+\s]robot', -'ms_search_4\.0_robot', -'msrabot', -'msrbot', -'mt::telegraph::agent', -'mydoyouhike', -'nagios', -'nasa_search', -'netestate\sne\scrawler', -'netluchs', -'netsprint', -'newsgatoronline', -'nicebot', -'nimblecrawler', -'noxtrumbot', -'npbot', -'loocalcrawler/nutch', -'nutchcvs', -'nutchosu\-vlib', -'nutch', # Must come after other nutch versions -'ocelli', -'octora_beta_bot', -'omniexplorer[_+\s]bot', -'onet\.pl[_+\s]sa', -'onfolio', -'opentaggerbot', -'openwebspider', -'optimizer', -'oracle_ultra_search', -'orangebot', -'orbiter', -'yodaobot', -'qihoobot', -'qwantify', -'passwordmaker\.org', -'pear_http_request_class', -'peerbot', -'perman', -'php[_+\s]version[_+\s]tracker', -'phpcrawl', -'picmole', -'pictureofinternet', -'ping\.blo\.gs', -'plinki', -'pluckfeedcrawler', -'plukkie', -'pogodak', -'pompos', -'popdexter', -'port_huron_labs', -'postfavorites', -'projectwf\-java\-test\-crawler', -'proodlebot', -'publiclibraryarchive', -'pyquery', -'rambler', -'redalert', -'riddler', -'rogerbot', -'rojo', -'rssimagesbot', -'ruffle', -'rufusbot', -'safeads\.xyz', -'safesearch', -'sandcrawler', -'savetheworldheritage', -'sbider', -'schizozilla', -'scumbot', -'searchguild[_+\s]dmoz[_+\s]experiment', -'searchmetricsbot', -'seekbot', -'semrushbot', -'sensis_web_crawler', -'seodiver', -'seokicks\.de', -'seoscanners', -'seznambot', -'shim\-crawler', -'shoutcast', -'sitedomain-bot', -'siteexplorer\.info', -'skimbot', -'slysearch', -'smtbot', -'snap\.com_beta_crawler', -'sohu\-search', -'sohu', # "sohu agent" -'snappy', -'spbot', -'sphere_scout', -'spiderlytics', -'spip', -'sproose_crawler', -'ssearch_bot', -'steeler', -'steroid__download', -'stq_bot', -'suchfin\-bot', -'superbot', -'surveybot', -'susie', -'syndic8', -'syndicapi', -'synoobot', -'tcl_http_client_package', -'technoratibot', -'teragramcrawlersurf', -'test_crawler', -'testbot', -'thumbsniper', -'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e', -'topicblogs', -'turnitinbot', -'turtlescanner', # Must be before turtle -'turtle', -'tutorgigbot', -'twiceler', -'ubicrawler', -'ultraseek', -'unchaos_bot_hybrid_web_search_engine', -'unido\-bot', -'unisterbot', -'updated', -'ustc\-semantic\-group', -'vagabondo\-wap', -'vagabondo', -'vebidoobot', -'vermut', -'versus_crawler_from_eda\.baykan@epfl\.ch', -'vespa_crawler', -'voltron', -'vortex', -'vse\/', -'w3c\-checklink', -'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa', -'w3c_validator', -'watchmouse', -'wavefire', -'waybackarchive\.org', -'wbsearchbot', -'webclipping\.com', -'webcompass', -'webcrawl\.net', -'web_downloader', -'webdup', -'webfilter', -'webindexer', -'webminer', -'website[_+\s]monitoring[_+\s]bot', -'webvulncrawl', -'wells_search', -'wer-liefert-was', -'wesee:search', -'wevikabot', -'wonderer', -'wotbox', -'wume_crawler', -'wwweasel', -'xenu\'s_link_sleuth', -'xenu_link_sleuth', -'xirq', -'xovibot', -'y!j', # Must come after keyoshid Y!J -'yacy', -'yahoo\-blogs', -'yahoo\-verticalcrawler', -'yahoofeedseeker', -'yahooseeker\-testing', -'yahooseeker', -'yahoo\-mmcrawler', -'yahoo!_mindset', -'yandex', -'flexum', -'yanga', -'yet-another-spider', -'yisouspider', -'yooglifetchagent', -'z\-add_link_checker', -'zealbot', -'zhuaxia', -'zspider', -'zeus', -'ng\/1\.', # put at end to avoid false positive -'ng\/2\.', # put at end to avoid false positive -'exabot', # put at end to avoid false positive -# Additional bots found by Sussex. -'^[1-3]$', # Hiding bots. Doesn't appear to be a valid user agent. -'alltop', -'applesyndication', -'asynchttpclient', -'bingbot', -'blogged_crawl', -'bloglovin', -'butterfly', -'buzztracker', -'carpathia', -'catbot', -'chattertrap', -'check_http', #(nagios) a monitoring tool -'coldfusion', -'covario', -'daylifefeedfetcher', -'discobot', -'dlvr\.it', -'dreamwidth', -'drupal', -'ezoom', -'feedmyinbox', -'feedroll\.com', -'feedzira', -'fever\/', -'freenews', -'geohasher', -'hanrss', -'inagist', -'jacobin\sclub', -'jakarta', -'js\-kit', -'largesmall\scrawler', -'linkedinbot', -'longurl', -'metauri', -'microsoft\-webdav\-miniredir', -'^motorola$', -'movabletype', -# These appear to be bots trying to hide. All of the usual architecture data is missing. -'^mozilla\/3\.0\s\(compatible$', -'^mozilla\/4\.0$', -'^mozilla\/4\.0\s\(compatible;\)$', -'^mozilla\/5\.0$', -'^mozilla\/5\.0\s\(compatible;$', -'^mozilla\/5\.0\s\(en\-us\)$', -'^mozilla\/5\.0\sfirefox\/3\.0\.5$', -'^msie', -# End of hiding bots. -'netnewswire', -'\snetseer\s', -'netvibes', -'newrelicpinger', -'newsfox', -'nextgensearchbot', -'ning', -'pingdom', -'pita', -'postpost', -'postrank', -'printfulbot', -'protopage', -'proximic', -'quipply', -'r6\_', -'ratingburner', -'regator', -'rome\sclient', -'rpt\-httpclient', -'rssgraffiti', -'sage\+\+', -'scoutjet', -'simplepie', -'sitebot', -'summify\.com', -'superfeedr', -'synthesio', -'teoma', -'topblogsinfo', -'topix\.net', -'trapit', -'trileet', -'tweetedtimes', -'twisted\spagegetter', -'twitterbot', -'twitterfeed', -'unwindfetchor', -'wazzup', -'windows\-rss\-platform', -'wiumi', -'xydo', -'yahoo!\sslurp', -'yahoo\spipes', -'yahoo\-newscrawler', -'yahoocachesystem', -'yahooexternalcache', -'yahoo!\ssearchmonkey', -'yahooysmcm', -'yammer', -# 'yandexbot', #already covered by 'yandex' -'yeti', -'yie8', -'youdao', -'yourls', -'zemanta', -'zend_http_client', -'zumbot', -# Other id that are 99% of robots -'wget', -'libwww', -'^java\/[0-9]' # put at end to avoid false positive -); -@RobotsSearchIDOrder_listgen = ( -# Generic robot -'robot', -'checker', -'crawl', -'discovery', -'hunter', -'scanner', -'spider', -'sucker', -'bot[\s_+:,\.\;\/\\\-]', -# Identifies -#"Mozilla/5.0 (Linux; U; Android 4.2.2; de-de; CUBOT P9 Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30" -#as a but. There is a Android mobile phone called "CUBOT P9", so this is probably not a bot. -'[\s_+:,\.\;\/\\\-]bot', -'curl', -'php', -'ruby\/', -'no_user_agent' -); - - - -# RobotsHashIDLib -# List of robots names ('robot id','robot clear text') -#------------------------------------------------------- -%RobotsHashIDLib = ( -# Common robots (In robot file) -'appie','Walhello appie', -'architext','ArchitextSpider', -'bingpreview','Bing Preview bot', -'bjaaland','Bjaaland', -'ferret','Wild Ferret Web Hopper #1, #2, #3', -'contentmatch','Yahoo!China ContentMatch Crawler', -'googlebot\-image','Googlebot-Image', -'googlebot','Googlebot', -'google\-sitemaps', 'Google Sitemaps', -'grabber', 'Grabber (SDSC)', -'google[_+\s]web[_+\s]preview', 'Google Web Preview', -'gulliver','Northern Light Gulliver', -'virus[_+\s]detector','virus_detector', -'harvest','Harvest', -'htdig','ht://Dig', -'jeeves','Ask', -'linkwalker','LinkWalker', -'lilina','Lilina', -'lycos[_+\s]','Lycos', -'moget','moget', -'muscatferret','Muscat Ferret', -'myweb','Internet Shinchakubin', -'nomad','Nomad', -'scooter','Scooter', -'slurp','Yahoo Slurp', -'^voyager\/','Voyager', -'weblayers','Weblayers', -# Common robots (Not in robot file) -'antibot','Antibot', -'bruinbot','The web archive', -'digout4u','Digout4u', -'echo!','EchO!', -'fast\-webcrawler','Fast-Webcrawler', -'ia_archiver\-web\.archive\.org','The web archive (IA Archiver)', -'ia_archiver','Alexa (IA Archiver)', -'jennybot','JennyBot', -'mercator','Mercator', -'msnbot\-media','MSNBot-media', -'msnbot-udiscovery', 'msnbot-UDiscovery Note: AWStats counts most of its traffic as user traffic', -'msnbot','MSNBot', -'netcraft','Netcraft', -'petersnews','Petersnews', -'unlost_web_crawler','Unlost Web Crawler', -'voila','Voila', -'webbase', 'WebBase', -'zyborg','ZyBorg', -'wisenutbot','WISENutbot', -'webcollage','WebCollage', -'cfetch','Cfetch', -# Less common robots (In robot file) -'007ac9', '007ac9 Crawler, seems to belong to SISTRIX', -'[^a]fish','Fish search', -'abcdatos','ABCdatos BotLink', -'abonti\.com','Abonti WebSearch', -'acme\.spider','Acme.Spider', -'ahoythehomepagefinder','Ahoy! The Homepage Finder', -'ahrefsbot', 'AhrefsBot', -'alkaline','Alkaline', -'anthill','Anthill', -'arachnophilia','Arachnophilia', -'arale','Arale', -'araneo','Araneo', -'aretha','Aretha', -'ariadne','ARIADNE', -'powermarks','Powermarks', # must come before Arks; seen used by referrer spam -'arks','arks', -'aspider','ASpider (Associative Spider)', -'atn\.txt','ATN Worldwide', -'atomz','Atomz.com Search Robot', -'auresys','AURESYS', -'backrub','BackRub', -'bbot','BBot', -'bigbrother','Big Brother', -'blackwidow','BlackWidow', -'blindekuh','Die Blinde Kuh', -'bloodhound','Bloodhound', -'borg\-bot','Borg-Bot', -'brightnet','bright.net caching robot', -'bspider','BSpider', -'cactvschemistryspider','CACTVS Chemistry Spider', -'calif[^r]','Calif', -'cassandra','Cassandra', -'cgireader','Digimarc Marcspider/CGI', -'checkbot','Checkbot', -'christcrawler','ChristCrawler.com', -'churl','churl', -'cienciaficcion','cIeNcIaFiCcIoN.nEt', -'cms\scrawler', 'CMS Crawler', -'collective','Collective', -'combine','Combine System', -'conceptbot','Conceptbot', -'coolbot','CoolBot', -'core','Web Core / Roots', -'cosmos','XYLEME Robot', -'crazywebcrawler', 'CrazyWeb Crawler', -'cruiser','Internet Cruiser Robot', -'cusco','Cusco', -'cyberspyder','CyberSpyder Link Test', -'desertrealm','Desert Realm Spider', -'deweb','DeWeb(c) Katalog/Index', -'dienstspider','DienstSpider', -'digger','Digger', -'diibot','Digital Integrity Robot', -'direct_hit','Direct Hit Grabber', -'dnabot','DNAbot', -'domainappender', 'DomainAppender', -'download_express','DownLoad Express', -'dragonbot','DragonBot', -'dwcp','DWCP (Dridus\' Web Cataloging Project)', -'e\-collector','e-collector', -'ebiness','EbiNess', -'elfinbot','ELFINBOT', -'emacs','Emacs-w3 Search Engine', -'emcspider','ananzi', -'esther','Esther', -'evliyacelebi','Evliya Celebi', -'fastcrawler','FastCrawler', -'feedcrawl','FeedCrawl by feed@aobo.com', -'fdse','Fluid Dynamics Search Engine robot', -'felix','Felix IDE', -'fetchrover','FetchRover', -'fido','fido', -'finnish','Finnish', -'fireball','KIT-Fireball', -'fouineur','Fouineur', -'francoroute','Robot Francoroute', -'freecrawl','Freecrawl', -'funnelweb','FunnelWeb', -'gama','gammaSpider, FocusedCrawler', -'gazz','gazz', -'gcreep','GCreep', -'getbot','GetBot', -'geturl','GetURL', -'golem','Golem', -'gougou','GouGou', -'grapnel','Grapnel/0.01 Experiment', -'griffon','Griffon', -'gromit','Gromit', -'gulperbot','Gulper Bot', -'hambot','HamBot', -'havindex','havIndex', -'hometown','Hometown Spider Pro', -'htmlgobble','HTMLgobble', -'hyperdecontextualizer','Hyper-Decontextualizer', -'iajabot','iajaBot', -'iaskspider','Sina Iask Spider', -'hl_ftien_spider','Hylanda', -'sogou','Sogou Spider', -'icjobs\.de', 'iCjobs Spider Note: Most traffic counts as user traffic', -#20130805 The user agent string of the icjobs-spider contained the -#identifying string only when it accessed the robots.txt file. -#When it accessed the actual content it did not identify itself as -#a spider. Thus traffic of this spider was counted as user traffic. -#The behavious seems to have changed now - the spider identifies itself -#when it accesses content pages. -#20141401 Behavior as before: Does identify itself when it accesses -# robots.txt and the root page. The following traffic does not contain -# the identification string and is therefore counted as user traffic. -'iconoclast','Popular Iconoclast', -'ilse','Ingrid', -'imagelock','Imagelock', -'incywincy','IncyWincy', -'informant','Informant', -'infoseek','InfoSeek Robot 1.0', -'infoseeksidewinder','Infoseek Sidewinder', -'infospider','InfoSpiders', -'inspectorwww','Inspector Web', -'intelliagent','IntelliAgent', -'ips\-agent', 'ips-agent Verisign(?) - no reliable information found.', -'irobot','I, Robot', -'iron33','Iron33', -'israelisearch','Israeli-search', -'javabee','JavaBee', -'jbot','JBot Java Web Robot', -'jcrawler','JCrawler', -'jobo','JoBo Java Web Robot', -'jobot','Jobot', -'joebot','JoeBot', -'jubii','The Jubii Indexing Robot', -'jumpstation','JumpStation', -'kapsi','image.kapsi.net', -'katipo','Katipo', -'kilroy','Kilroy', -'ko[_+\s]yappo[_+\s]robot','KO_Yappo_Robot', -'kummhttp','KummHttp', -'labelgrabber\.txt','LabelGrabber', -'larbin','larbin', -'legs','legs', -'linkidator','Link Validator', -'linkscan','LinkScan', -'lockon','Lockon', -'logo_gif','logo.gif Crawler', -'macworm','Mac WWWWorm', -'lmspider','lmspider', -'lwp\-request','lwp-request', -'lwp\-trivial','lwp-trivial', -'magpie','MagpieRSS', -'marvin','marvin/infoseek', -'mattie','Mattie', -'mediafox','MediaFox', -'merzscope','MerzScope', -'meshexplorer','NEC-MeshExplorer', -'mindcrawler','MindCrawler', -'mnogosearch','mnoGoSearch search engine software', -'momspider','MOMspider', -'monster','Monster', -'motor','Motor', -'muncher','Muncher', -'mwdsearch','Mwd.Search', -'ndspider','NDSpider', -'nederland\.zoek','Nederland.zoek', -'netcarta','NetCarta WebMap Engine', -'netmechanic','NetMechanic', -'netscoop','NetScoop', -'newscan\-online','newscan-online', -'nhse','NHSE Web Forager', -'northstar','The NorthStar Robot', -'nzexplorer','nzexplorer', -'objectssearch','ObjectsSearch', -'occam','Occam', -'octopus','HKU WWW Octopus', -'openfind','Openfind data gatherer', -'orb_search','Orb Search', -'packrat','Pack Rat', -'pageboy','PageBoy', -'parasite','ParaSite', -'patric','Patric', -'pegasus','pegasus', -'perignator','The Peregrinator', -'perlcrawler','PerlCrawler 1.0', -'phantom','Phantom', -'phpdig','PhpDig', -'piltdownman','PiltdownMan', -'pimptrain','Pimptrain.com\'s robot', -'pioneer','Pioneer', -'pitkow','html_analyzer', -'pjspider','Portal Juice Spider', -'plumtreewebaccessor','PlumtreeWebAccessor', -'poppi','Poppi', -'portalb','PortalB Spider', -'psbot','psbot', -'python','Python-urllib', -'raven','Raven Search', -'rbse','RBSE Spider', -'resumerobot','Resume Robot', -'rhcs','RoadHouse Crawling System', -'road_runner','Road Runner: The ImageScape Robot', -'robbie','Robbie the Robot', -'robi','ComputingSite Robi/1.0', -'robocrawl','RoboCrawl Spider', -'robofox','RoboFox', -'robozilla','Robozilla', -'roverbot','Roverbot', -'rules','RuLeS', -'safetynetrobot','SafetyNet Robot', -'semalt', 'seamalt.com', -'search\-info','Sleek', -'search_au','Search.Aus-AU.COM', -'searchprocess','SearchProcess', -'senrigan','Senrigan', -'sgscout','SG-Scout', -'shaggy','ShagSeeker', -'shaihulud','Shai\'Hulud', -'sift','Sift', -'simbot','Simmany Robot Ver1.0', -'sistrix', 'SISTRIX Crawler', -'site\-valet','Site Valet', -'sitetech','SiteTech-Rover', -'skymob','Skymob.com', -'slcrawler','SLCrawler', -'smartspider','Smart Spider', -'snooper','Snooper', -'solbot','Solbot', -'speedy','Speedy Spider', -'spider[_+\s]monkey','Spider monkey', -'spiderbot','SpiderBot', -'spiderline','Spiderline Crawler', -'spiderlytics', 'Spiderlytics: No homepage, e-mail only: spider (at) spiderlytics.com', -'spiderman','Spiderman', -'spiderview','SpiderView(tm)', -'spry','Spry Wizard Robot', -'ssearcher','Site Searcher', -'sqworm','Sqworm', -'suke','Suke', -'sunrise','Sunrise', -'suntek','suntek search engine', -'sven','Sven', -'tach_bw','TACH Black Widow', -'tagyu_agent','Tagyu Agent', -'tarantula','Tarantula', -'tarspider','tarspider', -'tailrank','TailRank', -'techbot','TechBOT', -'templeton','Templeton', -'titan','TITAN', -'titin','TitIn', -'tkwww','The TkWWW Robot', -'tlspider','TLSpider', -'ucsd','UCSD Crawl', -'udmsearch','UdmSearch', -'universalfeedparser','UniversalFeedParser', -'urlck','URL Check', -'valkyrie','Valkyrie', -'verticrawl','Verticrawl', -'victoria','Victoria', -'visionsearch','vision-search', -'voidbot','void-bot', -'vwbot','VWbot', -'w3index','The NWI Robot', -'w3m2','W3M2', -'wallpaper','WallPaper (alias crawlpaper)', -'wanderer','the World Wide Web Wanderer', -'wapspider','w@pSpider by wap4.com', -'webbandit','WebBandit Web Spider', -'webcatcher','WebCatcher', -'webcopy','WebCopy', -'webfetcher','webfetcher', -'webfoot','The Webfoot Robot', -'webinator','Webinator', -'weblinker','WebLinker', -'webmirror','WebMirror', -'webmoose','The Web Moose', -'webquest','WebQuest', -'webreader','Digimarc MarcSpider', -'webreaper','WebReaper', -'websnarf','Websnarf', -'webspider','WebSpider', -'webvac','WebVac', -'webwalk','webwalk', -'webwalker','WebWalker', -'webwatch','WebWatch', -'whatuseek','whatUseek Winona', -'whowhere','WhoWhere Robot', -'wired\-digital','Wired Digital', -'wmir','w3mir', -'wolp','WebStolperer', -'wombat','The Web Wombat', -'wordpress','WordPress', -'worm','The World Wide Web Worm', -'woozweb','Woozweb Monitoring', -'wwwc','WWWC Ver 0.2.5', -'wz101','WebZinger', -'xenu\slink\ssleuth', 'Xenu'. "'" . 's Link Sleuth (TM), see Wikipedia', -'xget','XGET', -# Other robots reported by users -'^finbot', 'finbot', -'^webindex$', 'WebIndex', -'1\-more_scanner','1-More Scanner', -'360spider','360spider', -'a6-indexer', 'A6-Indexer', -'accoona\-ai\-agent','Accoona-AI-Agent', -'activebookmark','ActiveBookmark', -'adamm_bot','AdamM Bot', -'adsbot-google', 'AdsBot-Google', -'advbot', 'AdvBot', -'affectv\.co\.uk', 'affectv.co.uk', -'almaden','IBM Almaden Research Center WebFountain™', -'aipbot','aipbot', -'aleadsoftbot','ALeadSoftbot', -'alpha_search_agent','Alpha Search Agent', -'allrati','Allrati', -'aport', 'Aport', -'applebot', 'Applebot', -'archive\-de\.com', 'Archive-de.com', -'archive\.org_bot','archive.org bot', -'argus','Argus', -'arianna\.libero\.it','arianna.libero.it', -'aspseek','ASPseek', -'asterias', 'Asterias', -'awbot', 'AWBot', -'backlinktest\.com', 'BacklinkCrawler', -'baiduspider','BaiDuSpider', -'becomebot', 'BecomeBot', -'bender','bender focused_crawler', -'betabot','BetaBot', -'biglotron','Biglotron', -'bittorrent_bot','BitTorrent Bot', -'biz360[_+\s]spider','Biz360 spider', -'blexbot', 'BLEXBot, seems to belong to the WebMeUp backlink tool', -'blogbridge[_+\s]service','BlogBridge Service', -'bloglines','Bloglines', -'blogpulse','BlogPulse ISSpider intelliseek.com', -'blogsearch','BlogSearch', -'blogshares','Blogshares Spiders', -'blogslive','Blogslive', -'blogssay','BlogsSay :: RSS Search Crawler', -'bncf\.firenze\.sbn\.it\/raccolta\.txt','Biblioteca Nazionale Centrale di Firenze', -'bobby', 'Bobby', -'boitho\.com\-dc','boitho.com-dc', -'bookmark\-manager','Bookmark-Manager', -'boris', 'Boris', -'bubing', 'BUbiNG', -'bumblebee', 'Bumblebee (relevare.com)', -'candlelight[_+\s]favorites[_+\s]inspector','Candlelight_Favorites_Inspector', -'careerbot', 'CareerBot', -'cbn00glebot','cbn00glebot', -'ccbot', 'Common Crawl', -'cerberian_drtrs','Cerberian Drtrs', -'cfnetwork','CFNetwork', -'cipinetbot','CipinetBot', -'checkweb_link_validator','CheckWeb link validator', -'cliqzbot', 'Cliqzbot', -'commons\-httpclient','Jakarta commons-httpclient', -'computer_and_automation_research_institute_crawler','Computer and Automation Research Institute Crawler', -'converamultimediacrawler','ConveraMultiMediaCrawler', -'converacrawler','ConveraCrawler', -'copubbot', 'CoPubbot', -'cscrawler','CsCrawler', -'cse_html_validator_lite_online','CSE HTML Validator Lite Online','cuasarbot','Cuasarbot', -'cursor','Cursor', -'custo','Custo', -'datafountains\/dmoz_downloader','DataFountains/DMOZ Downloader', -'dataprovider\.com', 'Dataprovider Site Explorer', -'daumoa', 'Daum', -'daviesbot', 'DaviesBot', -'daypopbot', 'DayPop', -'deepindex','Deepindex', -'deusu', 'DeuSu', -'dipsie\.bot','Dipsie', -'dnsgroup','DNSGroup', -'doccheckbot', 'doccheckbot/1.0, known to Project Honey Pot', -'domainchecker','DomainChecker', -'domainsdb\.net','DomainsDB.net', -'dotbot', 'DotBot, Open Site Explorer', -'duckduckgo-favicons-bot', 'DuckDuckGo-Favicons-Bot', -'dulance','Dulance', -'dumbot','Dumbot', -'dumm\.de\-bot','dumm.de-Bot', -'earthcom\.info','EARTHCOM.info', -'easydl','EasyDL', -'eccp', 'Eniro Sverige, email: search (at) eniro.com', -'edgeio\-retriever','edgeio-retriever', -'ernst[:blank:]2\.0', 'Ernst 2.0 (does not provide any further information)', -'ets_v','ETS Enterprise Translation Server', -'exactseek','ExactSeek Crawler', -'extreme[_+\s]picture[_+\s]finder','Extreme_Picture_Finder', -'eventax','eventax', -'everbeecrawler','EverbeeCrawler', -'everest\-vulcan','Everest-Vulcan', -'ezresult', 'Ezresult', -'enteprise','Fast Enteprise Crawler', -'facebook','FaceBook bot', -'facebot', 'Facebot (Facebook bot?)', -'fast\-search\-engine','Fast-Search-Engine (not fastsearch.com)', -'fast_enterprise_crawler','FAST Enterprise Crawler', -'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de','FAST Enterprise Crawler * crawleradmin.t-info@telekom.de', -'finderlein[_+\s]research[_+\s]crawler', 'Finderlein Research Crawler 1.0 (no contact information given)', -'matrix_s\.p\.a\._\-_fast_enterprise_crawler','Matrix S.p.A. - FAST Enterprise Crawler', -'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de','FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de', -'fastbot', 'fastbot', -'favicon','FavIconizer', -'favorg','FavOrg', -'favorites_sweeper','Favorites Sweeper', -'feedburner', 'Feedburner', -'feedfetcher\-google','Feedfetcher-Google', -'feedflow','FeedFlow', -'feedster','Feedster', -'feedsky','FeedSky', -'feedvalidator','FeedValidator', -'fetchbot', 'Fetchbot', -'filmkamerabot','FilmkameraBot', -'filterdb\.iss\.net', 'oBot', -'findexa_crawler','Findexa Crawler', -'firmilybot', 'Firmily Bot Home page (Website was hacked on Oct. 19, 2013)', -'findlinks','Findlinks', -'foaf-search\.net', 'Friend of a friend (FOAF) search engine', -'fooky\.com\/ScorpionBot','Fooky.com/ScorpionBot/ScoutOut', -'g2crawler','G2Crawler', -'gaisbot','Gaisbot', -'geniebot','Geniebot', -'genieo', 'Genieo', -'gigablastopensource', 'GigablastOpenSource, an Open Source Search Engine(Wiki)', -'gigabot','GigaBot', -'girafabot','Girafabot', -'global_fetch','Global Fetch', -'gnodspider','GNOD Spider', -'goforit\.com','GoForIt.com', -'goforitbot','GOFORITBOT', -'gonzo','suchen.de', -'gpu_p2p_crawler','GPU p2p crawler', -'grapeshot', 'Grapeshot Crawler', -'grub','Grub.org', -'henrythemiragorobot', 'Mirago', -'heritrix','Heritrix', -'holmes', 'Holmes', -'hoowwwer','HooWWWer', -'hpprint','HPPrint', -'htmlparser','HTMLParser', -'html[_+\s]link[_+\s]validator','Html_Link_Validator', -'httrack','HTTrack off-line browser', -'hundesuche\.com\-bot','Hundesuche.com-Bot', -'i-bot','i-bot', -'icarus6j', 'Icarus6j, email address in UA string, no website', -'ichiro','ichiro', -'idmarch', 'IDMARCH', -'iltrovatore\-setaccio','IlTrovatore-Setaccio', -'implisensebot', 'ImplisenseBot', -'infobot','InfoBot', -'infociousbot','InfociousBot', -'infohelfer','Infohelfer', -'infomine','INFOMINE VLCrawler', -'insurancobot','InsurancoBot', -'integromedb\.org','IntegromeDB', -'internet[_+\s]ninja','Internet_Ninja ', -'internetarchive','InternetArchive', -'internetseer', 'InternetSeer', -'internetsupervision','InternetSupervision', -'irlbot','IRLbot', -'isearch2006','isearch2006', -'istellabot', 'IstellaBot', -'iupui_research_bot','IUPUI_Research_Bot', -'izsearch', 'iZSearch', -'james\sbot', 'James BOT', -'jobboerse', 'Jobbörse', -'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility','JRTwine_Software_Check_Favorites_Utility', -'justview', 'JustView', -'kalambot','KalamBot', -'kamano\.de_newsfeedverzeichnis','kamano.de NewsFeedVerzeichnis', -'kazoombot','KazoomBot', -'kevin','Kevin', -'keyoshid','Yahoo! Japan keyoshid robot study', -'kinjabot', 'Kinjabot', -'kinja\-imagebot', 'Kinja Imagebot', -'knowitall','KnowItAll', -'knowledge\.com','Knowledge.com', -'kouaa_krawler','Kouaa Krawler', -'krugle','Krugle', -'ksibot','ksibot', -'kurzor','Kurzor', -'lanshanbot','lanshanbot', -'letscrawl\.com','LetsCrawl.com', -'libcrawl','Crawl libcrawl', -'link_valet_online','Link Valet Online', -'linkbot','LinkBot', -'linkdex\.com', 'Linkdex', -'linkchecker','LinkChecker', -'linkstats\sbot', 'LinkStats Bot', -'lipperhey', 'Lipperhey SEO Service', -'livejournal\.com', 'LiveJournal.com', -'loadtimebot', 'LoadTimeBot', -'lssrocketcrawler', 'LSSRocketCrawler (no contact information)', -'ltbot', 'Language Tools Bot (ltbot)', -'ltx71', 'ltx71', -'madaali\.de', 'www.madaali.de', -'magpierss', 'MagpieRSS', -'mail\.ru', 'Mail.ru bot', -'mapoftheinternet\.com','MapoftheInternet.com', -'meanpathbot', 'Meanpathbot', -'mediabot', 'MediaBot', -'mediapartners\-google','Google AdSense', -# 'Mediapartners-Google (Feb 12, 2015: no additial information in UA String, seems to use GigablastOpenSource', -# Uses UA string "Mediapartners-Google" only, and there were accesses using an UA string "GigablastOpenSource/1.0" from the same IP-Address. -# Therefore this is probably not related to Google 4.3.2015 Albrecht Müller -'megaindex', 'MegaIndex Crawler, seems to belong to MegaIndex.ru', -'megite','Megite', -'memorybot', 'Archivethe.net', -'metager2-verification-bot', 'metager2-verification-bot', -'metager\-linkchecker','MetaGer LinkChecker', -'metajobbot', 'MetaJobBot', -'metaspinner','Metaspinner', -'miadev', 'MiaDev spider', -'microsoft\sbits', 'Microsoft Background Intelligent Transfer Service (BITS)?', -'microsoft.*discovery', 'Microsoft Office Protocol Discovery/Microsoft Office Existence Discovery', -'microsoft[_+\s]url[_+\s]control','Microsoft URL Control', -'mindupbot', 'mindUpBot (datenbutler.de)', -'minirank','miniRank', -'mini\-reptile','Mini-reptile', -'missigua_locator','Missigua_Locator', -'misterbot','Misterbot', -'miva','Miva', -'mizzu_labs','Mizzu Labs', -'mj12bot','MJ12bot', -'mojeekbot','MojeekBot', -'msiecrawler','MSIECrawler', -'ms[_+\s]search[_+\s]6\.0[_+\s]robot','MS Search 6.0 Robot (MS SharePoint Portal Server?)', -'ms_search_4\.0_robot','MS SharePoint Portal Server - MS Search 4.0 Robot', -'msrabot','msrabot', -'msrbot','MSRBOT', -'mt::telegraph::agent','MT::Telegraph::Agent', -'mydoyouhike','Mydoyouhike', -'nagios','Nagios', -'nasa_search','NASA Search', -'netestate\sne\scrawler','Website-Datenbank', -'netluchs','Netluchs', -'netsprint','NetSprint', -'newsgatoronline', 'NewsGator Online', -'nicebot','nicebot', -'nimblecrawler','NimbleCrawler', -'noxtrumbot','noxtrumbot', -'npbot','NPBot', -'loocalcrawler/nutch', 'LoocalCrawler/Nutch', -'nutchcvs','NutchCVS', -'nutchosu\-vlib','NutchOSU-VLIB', -'nutch','Nutch', -'ocelli','Ocelli', -'octora_beta_bot','Octora Beta Bot', -'omniexplorer[_+\s]bot','OmniExplorer Bot', -'onet\.pl[_+\s]sa','Onet.pl_SA', -'onfolio','Onfolio', -'opentaggerbot','OpenTaggerBot', -'openwebspider','OpenWebSpider', -'optimizer', 'Optimizer', -'oracle_ultra_search','Oracle Ultra Search', -'orangebot', 'OrangeBot, no website, log entry specifies mail address', # support.orangebot@orange.com -'orbiter','Orbiter', -'yodaobot','OutfoxBot/YodaoBot', -'qihoobot','QihooBot', -'qwantify', 'Qwant', -'passwordmaker\.org','passwordmaker.org', -'pear_http_request_class','PEAR HTTP Request class', -'peerbot','PEERbot', -'perman', 'Perman surfer', -'php[_+\s]version[_+\s]tracker','PHP version tracker', -'phpcrawl', 'PHPCrawl', -'picmole', 'Specified address www.picmole.com was not reachable on April 21, 2014', -'pictureofinternet','PictureOfInternet', -'ping\.blo\.gs','ping.blo.gs', -'plinki','plinki', -'pluckfeedcrawler','PluckFeedCrawler', -'plukkie', 'Plukkie', -'pogodak','Pogodak.com', -'pompos','Pompos', -'popdexter','Popdexter', -'port_huron_labs','Port Huron Labs', -'postfavorites','PostFavorites', -'projectwf\-java\-test\-crawler','ProjectWF-java-test-crawler', -'proodlebot','proodleBot', -'publiclibraryarchive', 'publiclibraryarchive.org (related to spiderlytics.com and/or waybackarchive.org?)', -#Observations 2014-06-23 -#Domain publiclibraryarchive.org is parked at GoDaddy.com -#from https://www.projecthoneypot.org/ -#81.30.151.220's User Agent Strings (honeypot classified this ip as an mail server, active about 6 years ago) -#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org) -#176.9.138.27's User Agent Strings -#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org) -#Mozilla/5.0 (compatible; Spiderlytics/1.0; +spider@spiderlytics.com) -#Mozilla/5.0 (compatible; waybackarchive.org/1.0; +spider@waybackarchive.org) -#146.0.32.165's User Agent Strings -#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org) -#Mozilla/5.0 (compatible; savetheworldheritage.org/1.0; +crawl@savetheworldheritage.org) -#Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net) -'pyquery','PyQuery', -'rambler','StackRambler', -'redalert','Red Alert', -'relevantnoise\.com', 'Relevant Noise', -'riddler', 'Riddler', -'rogerbot', 'Rogerbot', -'rojo','RoJo aggregator', -'rssimagesbot','rssImagesBot', -'ruffle','ruffle SemanticWeb crawler', -'rufusbot','RufusBot Rufus Web Miner', -'safeads\.xyz', 'SafeAds.xyz', -'safesearch', 'Avira SafeSearch', -'sandcrawler','SandCrawler (Microsoft)', -'savetheworldheritage', 'savetheworldheritage.org (related to spiderlytics.com, waybackarchive.org and/or publiclibraryarchive.org?)', -'sbider','SBIder', -'schizozilla','Schizozilla', -'scumbot','Scumbot', -'searchguild[_+\s]dmoz[_+\s]experiment','SearchGuild_DMOZ_Experiment', -'searchmetricsbot','SearchmetricsBot', -'seekbot','Seekbot', -'semrushbot', 'SemrushBot', -'sensis_web_crawler','Sensis Web Crawler', -'seodiver', 'SEO DIVER', -'seokicks\.de', 'SEOkicks Webcrawler', -'seoscanners', 'seoscanners.net (related to publiclibraryarchive.org and savetheworldheritage.org?)', -'seznambot','SeznamBot', -'shim\-crawler','Shim-Crawler', -'shoutcast','Shoutcast Directory Service', -'sitedomain-bot', 'Sitedomain.de', -'siteexplorer\.info', 'Site Explorer', -'skimbot', 'SkimBot', -'slysearch','SlySearch', -'smtbot', 'SMTBot', -'snap\.com_beta_crawler','snap.com beta crawler', -'sohu\-search','sohu-search', -'sohu','sohu agent', -'snappy','Snappy', -'spbot', 'SEOprofiler Bot', -'sphere_scout','Sphere Scout', -'spip','SPIP', -'sproose_crawler','sproose crawler', -'ssearch_bot', 'sSearch Crawler', -'steroid__download','STEROID Download', -'steeler','Steeler', -'stq_bot', 'SEARCHTEQ', -'suchfin\-bot','Suchfin-Bot', -'superbot','SuperBot', -'surveybot','SurveyBot', -'susie','Susie', -'syndic8','Syndic8', -'syndicapi','SyndicAPI', -'synoobot','SynooBot', -'tcl_http_client_package','Tcl http client package', -'technoratibot', 'Technoratibot', -'teragramcrawlersurf','TeragramCrawlerSURF', -'test_crawler','Test Crawler', -'testbot','TestBot', -'thumbsniper', 'ThumbSniper', -'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','T-H-U-N-D-E-R-S-T-O-N-E', -'topicblogs', 'topicblogs', -'turnitinbot', 'Turn It In', -'turtle', 'Turtle', -'turtlescanner', 'Turtle', -'tutorgigbot','TutorGigBot', -'twiceler','twiceler', -'ubicrawler','UbiCrawler', -'ultraseek', 'Ultraseek', -'unchaos_bot_hybrid_web_search_engine','UnChaos Bot Hybrid Web Search Engine', -'unido\-bot','unido-bot', -'unisterbot', 'UnisterBot; E-Mail only: crawler (at) unister.de', -'updated','updated', -'ustc\-semantic\-group','USTC-Semantic-Group', -'vagabondo\-wap','Vagabondo-WAP', -'vagabondo','Vagabondo', -'vebidoobot', 'vebidoobot', -'vermut','Vermut', -'versus_crawler_from_eda\.baykan@epfl\.ch','versus crawler from eda.baykan@epfl.ch', -'vespa_crawler','Vespa Crawler', -'voltron', 'voltron', -'vortex','VORTEX', -'vse\/','VSE', -'w3c\-checklink','W3C Link Checker', -'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa', 'W3C jigsaw CSS Validator', -'w3c_validator','W3C Validator', -'watchmouse', 'WatchMouse Website Monitor', -'wavefire','Wavefire', -'waybackarchive\.org', 'No website, email: spider(at)waybackarchive.org', -# 2.12.2013 Project Honeypot reports at least one of the IPs used by waybackarchive with a spiderlytics UA string. -# Problably not related to the wayback machine of archive.org. -'wbsearchbot', 'WBSearchBot', -'webclipping\.com', 'WebClipping.com', -'webcompass', 'webcompass', -'webcrawl\.net','webcrawl.net', -'web_downloader','Web Downloader', -'webdup','Webdup', -'webfilter','WebFilter', -'webindexer','WebIndexer', -'webminer','WebMiner', -'website[_+\s]monitoring[_+\s]bot','Website_Monitoring_Bot', -'webvulncrawl', 'WebVulnCrawl', -'wells_search','Wells Search', -'wer-liefert-was', 'Wer-liefert-was Crawler Note: AWStats counts most traffic as user traffic', -'wesee:search', 'WeSEE Bot', -'wevikabot', 'WeViKa', -'wonderer', 'Web Wombat Redback Spider', -'wotbox', 'Wotbox', -'wume_crawler','wume crawler', -'wwweasel',,'WWWeasel', -'xenu\'s_link_sleuth','Xenu Link Sleuth', -'xenu_link_sleuth','Xenu Link Sleuth', -'xirq','xirq', -'xovibot', 'XoviBot', -'y!j', 'Y!J Yahoo Japan', -'yacy', 'YaCy', -'yahoo\-blogs','Yahoo-Blogs', -'yahoo\-verticalcrawler', 'Yahoo Vertical Crawler', -'yahoofeedseeker', 'Yahoo Feed Seeker', -'yahooseeker\-testing', 'YahooSeeker-Testing', -'yahooseeker', 'YahooSeeker Yahoo! Blog crawler', -'yahoo\-mmcrawler', 'Yahoo-MMCrawler', -'yahoo!_mindset','Yahoo! Mindset', -'yandex', 'Yandex Bot', -'flexum', 'Flexum Search Engine', -'yanga', 'Yanga WorldSearch Bot', -'yet-another-spider','Yet-Another-Spider', -'yisouspider', 'YisouSpider (no additional information in UA string)', -'yooglifetchagent','yoogliFetchAgent', -'z\-add_link_checker','Z-Add Link Checker', -'zealbot','ZealBot', -'zhuaxia','ZhuaXia', -'zspider','zspider', -'zeus','Zeus Webster Pro', -'zumbot','ZumBot', -'ng\/1\.','NG 1.x (Exalead)', # put at end to avoid false positive -'ng\/2\.','NG 2.x (Exalead)', # put at end to avoid false positive -'exabot','Exabot', # put at end to avoid false positive -# Other id that are 99% of robots -'wget','WGet tools', -'libwww','Perl tool', -'^java\/[0-9]','Java (Often spam bot)', # put at end to avoid false positive -# Generic robot -'robot', 'Unknown robot (identified by \'robot\')', -'checker', 'Unknown robot (identified by \'checker\')', -'crawl', 'Unknown robot (identified by \'crawl\')', -'discovery', 'Unknown robot (identified by \'discovery\')', -'hunter', 'Unknown robot (identified by \'hunter\')', -'scanner', 'Unknown robot (identified by \'scanner\')', -'spider', 'Unknown robot (identified by \'spider\')', -'sucker', 'Unknown robot (identified by \'sucker\')', -'bot[\s_+:,\.\;\/\\\-]', 'Unknown robot (identified by \'bot\' followed by a space or one of the following characters _+:,.;/\-)', -'[\s_+:,\.\;\/\\\-]bot', 'Unknown robot (identified by a space or one of the characters _+:,.;/\- followed by \'bot\')', -'curl', 'Common *nix tool for automating web document retrieval. Most likely a bot.', -'php', 'A PHP script', -'ruby\/', 'Ruby script', -# Additional bots found by Sussex. -'^[1-3]$', 'Generic bot identified as "1", "2" or "3"', -'alltop', 'alltop', -'applesyndication', 'applesyndication', -'asynchttpclient', 'asynchttpclient', -'bingbot', 'Bingbot', -'blogged_crawl', 'blogged_crawl', -'bloglovin', 'bloglovin', -'butterfly', 'butterfly', -'buzztracker', 'buzztracker', -'carpathia', 'carpathia', -'catbot', 'catbot', -'chattertrap', 'chattertrap', -'check_http', 'check_http (nagios)', -'coldfusion', 'coldfusion', -'covario', 'covario', -'daylifefeedfetcher', 'daylifefeedfetcher', -'discobot', 'discobot', -'dlvr\.it', 'dlvr.it', -'dreamwidth', 'dreamwidth', -'drupal', 'Drupal Site', -'ezoom', 'ezoom', -'feedmyinbox', 'feedmyinbox', -'feedroll\.com', 'feedroll.com', -'feedzira', 'feedzira', -'fever\/', 'Feed a Fever', -'freenews', 'freenews', -'geohasher', 'geohasher', -'hanrss', 'hanrss', -'inagist', 'inagist', -'jacobin\sclub', 'jacobin club', -'jakarta', 'jakarta', -'js\-kit', 'js-kit', -'largesmall\scrawler', 'largesmall crawler', -'linkedinbot', 'linkedinbot', -'longurl', 'longurl', -'metauri', 'metauri', -'microsoft\-webdav\-miniredir', 'microsoft-webdav-miniredir', -'^motorola$', 'Suspected Bot masquerading as "Motorola"', -'movabletype', 'movabletype', -'^mozilla\/3\.0\s\(compatible$', 'Suspected bot masqurading as Mozilla', -'^mozilla\/4\.0$', 'Suspected bot masqurading as Mozilla', -'^mozilla\/4\.0\s\(compatible;\)$', 'Suspected bot masqurading as Mozilla', -'^mozilla\/5\.0$', 'Suspected bot masqurading as Mozilla', -'^mozilla\/5\.0\s\(compatible;$', 'Suspected bot masqurading as Mozilla', -'^mozilla\/5\.0\s\(en\-us\)$', 'Suspected bot masqurading as Mozilla', -'^mozilla\/5\.0\sfirefox\/3\.0\.5$', 'Suspected bot masqurading as Mozilla', -'^msie', 'Suspected bot masquerading as M$ IE', -'netnewswire', 'netnewswire', -'\snetseer\s', 'Net Seer', -'netvibes', 'netvibes', -'newrelicpinger', 'newrelicpinger', -'newsfox', 'Fox News', -'nextgensearchbot', 'nextgensearchbot', -'ning', 'ning', -'pingdom', 'pingdom', -'pita', 'pita (pain in the ass?)', -'postpost', 'postpost', -'postrank', 'postrank', -'printfulbot', 'printfulbot', -'protopage', 'protopage', -'proximic', 'Proximic Spider', -'quipply', 'quipply', -'r6\_', 'Radian 6 Crawler', -'ratingburner', 'ratingburner', -'regator', 'regator', -'rome\sclient', 'rome client', -'rpt\-httpclient', 'rpt-httpclient', -'rssgraffiti', 'rssgraffiti', -'sage\+\+', 'sage++', -'scoutjet', 'ScoutJet crawler for Blekko.', -'simplepie', 'simplepie', -'sitebot', 'sitebot', -'summify\.com', 'summify.com', -'superfeedr', 'superfeedr', -'synthesio', 'synthesio', -'teoma', 'teoma', -'topblogsinfo', 'topblogsinfo', -'topix\.net', 'topix.net', -'trapit', 'trapit', -'trileet', 'trileet', -'tweetedtimes', 'The Tweeted Times', -'twisted\spagegetter', 'twisted pagegetter', -'twitterbot', 'Twitterbot', -'twitterfeed', 'twitterfeed', -'unwindfetchor', 'unwindfetchor', -'wazzup', 'wazzup', -'windows\-rss\-platform', 'windows-rss-platform', -'wiumi', 'wiumi', -'xydo', 'xydo', -'yahoo!\sslurp', 'Additional Yahoo bots.', -'yahoo\spipes', 'Additional Yahoo bots.', -'yahoo\-newscrawler', 'Additional Yahoo bots.', -'yahoocachesystem', 'Additional Yahoo bots.', -'yahooexternalcache', 'Additional Yahoo bots.', -'yahoo!\ssearchmonkey', 'Additional Yahoo bots.', -'yahooysmcm', 'Additional Yahoo bots.', -'yammer', 'yammer', -#'yandexbot', 'yandexbot', #already covered by 'yandex' -'yeti', 'yeti', -'yie8', 'yie8', -'youdao', 'youdao', -'yourls', 'yourls', -'zemanta', 'zemanta', -'zend_http_client', 'Zend Http Client', -'no_user_agent','Unknown robot (identified by empty user agent string)', -# Unknown robots identified by hit on robots.txt -'unknown', 'Unknown robot (identified by hit on \'robots.txt\')' -); - - -# RobotsAffiliateLib -# This list try to tell by which Search Engine a robot is used -#------------------------------------------------------------- -%RobotsAffiliateLib = ( -'bingpreview'=>'Bing', -'fast\-webcrawler'=>'AllTheWeb', -'googlebot'=>'Google', -'google\-sitemap'=>'Google', -'google[_+\s]web[_+\s]preview'=>'Google', -'msnbot'=>'MSN', -'nutch'=>'Looksmart', -'scooter'=>'AltaVista', -'wisenutbot'=>'Looksmart', -'yahoo\-blogs'=>'Yahoo', -'yahoo\-verticalcrawler'=>'Yahoo', -'yahoofeedseeker'=>'Yahoo', -'yahooseeker\-testing'=>'Yahoo', -'yahooseeker'=>'Yahoo', -'yahoo\-mmcrawler'=>'Yahoo', -'yahoo!_mindset'=>'Yahoo', -'zyborg'=>'Looksmart', -'cfetch'=>'Kosmix', -'^voyager\/'=>'Kosmix', -# Additional bots found by Sussex. -'feedfetcher\-google'=>'Google', -'bingbot'=>'MSN', -'twitterbot'=>'Twitter', -'twitterfeed'=>'Twitter', -'yahoo!\sslurp'=>'Yahoo', -'yahoo\spipes'=>'Yahoo', -'yahoo-newscrawler'=>'Yahoo', -'yahoocachesystem'=>'Yahoo', -'yahooexternalcache'=>'Yahoo', -'yahoo!\ssearchmonkey'=>'Yahoo', -'yahooysmcm'=>'Yahoo' -); - -1; +# AWSTATS ROBOTS DATABASE +#------------------------------------------------------- +# If you want to add robots to extend AWStats database detection capabilities, +# you must add an entry in RobotsSearchIDOrder_listx and RobotsHashIDLib. + +# The entry in RobotsSearchIDOrder_listx is a Perl regular expression +# (see http://perldoc.perl.org/perlreref.html). AWSTats applies these +# expressions to the user agent string in the order given by the lists. The +# first match specifies the robot. +# +# Note: This regular expression must not contain any whitespace. +# Otherwise AWStats will produce lines in the database that +# will be misinterpreted and as a consequence the corresponding data in the +# generated HTML reports will be wrong. If you want to match whitespace in +# the user agent string, use other constructs like '\s', '[:blank:]', +# '\p{IsSpace}', '\x20' etc. +# +# The corresponding entry in RobotsHashIDLib contains the regular expression +# as key, followed by a string containing HTML-text. AWStats inserts this +# text into reports to describe the bot. If possible the text should contain +# a link to the bot home page. This makes it easier for sysadmins to find +# the information necessary e.g. to adapt the robots.txt file. +# +# An entry in the RobotsAffiliateLib is not necessary. An entry in this list +# contains as first part the regular expression specifying the bot. The +# second part is a string that gives the Company or product managing the bot. +# This information is not used yet. +# +# There are several sorts of bots that AWStats is not able to detect and +# therefore a considerable amount of bot generated traffic counts +# as user traffic: +# +# a) A crawler that identifies itself in the referrer string, but not in +# the user agent string. An example is the crawler from semalt.semalt.com. +# +# b) Crawlers that correctly access robots.txt but identify themselves in +# in the user agent string only once or just a few times. Most of the +# time a user agent string ist used that does not contain hints that +# a bot is involved. An example is the iCjobs spider. +# msnbot-UDiscovery/2.0b seems to show this behaviour too. +# +# +# +#------------------------------------------------------- + +# 2016-09-02 RobC +# Fixed a few errors and added a few missing bots from awstats 7.5 release. +# +# 2016-08-28 RobC +# Complete re-build of this file almost from scratch. +# dropped many old bots, added many new bots and reordered file. +# edited and added regex expressions to stop spaces causing problems. +# You should tune file by placing the most common robots crawling your site at top +# in List1. +# +# +# N.B. many bots need to be in correct order so don't chnage order without checking if +# change will cause counts to be allocated to wrong bot. Not always simple. +# +# +# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html +# added dipsie (not tested with real data). +# added DomainsDB.net http://domainsdb.net/ +# added ia_archiver-web.archive.org (was inadvertently grouped with Alexa traffic) +# added Nutch (used by looksmart (furl?)) +# added rssImagesBot +# added Sqworm +# added t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e +# added w3c css-validator +# added documentation link to bot home pages for above and selected major bots. +# In the case of international bots, choose .com page. +# Included tool tip (html "title"). +# To do: parameterize to match both AWStats language and tooltips settings. +# To do: add html links for all bots based on current documentation in source +# files referenced below. +# changed '\wbot[\/\-]', to '\wbot[\/\-]' (removed comma) +# made minor grammar corrections to notes below +# 2005-08-24 added YahooSeeker-Testing +# added w3c-checklink +# updated url for ask.com +# 2005-08-24 added Girafabot http://www.girafa.com/ +# 2005-08-30 added PluckFeedCrawler http://www.pluck.com/ +# added Gaisbot/3.0 (robot05@gais.cs.ccu.edu.tw; ) +# dded geniebot (wgao@genieknows.com) +# added BecomeBot link http://www.become.com/site_owners.html +# added topicblogs http://www.topicblogs.com/ +# added Powermarks; seen used by referrer spam +# added YahooSeeker +# added NG/2. http://www.exabot.com/ +# 2005-09-15 added link for Walhello appie +# added bender focused_crawler +# updated YahooSeeker description (blog crawler) +# 2005-09-16 added link for http://linkchecker.sourceforge.net +# added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl) +# added Blogslive info@blogslive.com intelliseek.com +# added BlogPulse (ISSpider-3.0) intelliseek.com +# 2005-09-26 added Feedfetcher-Google (http://www.google.com/feedfetcher.html) +# added EverbeeCrawler +# added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html +# added link for Bloglines http://www.bloglines.com +# 2005-10-19 fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html) +# added Blogshares Spiders (Synchronized V1.5.1) +# added yacy +# 2005-11-21 added Argus www.simpy.com +# added BlogsSay :: RSS Search Crawler (http://www.blogssay.com/) +# added MJ12bot http://majestic12.co.uk/bot.php +# added OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm) +# added OutfoxBot/0.3 (For internet experiments; outfox.agent@gmail.com) +# added RufusBot Rufus Web Miner http://64.124.122.252.webaroo.com/feedback.html +# added Seekbot (http://www.seekbot.net/bot.html) +# added Yahoo-MMCrawler/3.x (mms-mmcrawler-support@yahoo-inc.com) +# added link for BaiDuSpider +# added link for Blogshares Spider +# added link for StackRambler http://www.rambler.ru/doc/faq.shtml +# added link for WISENutbot +# added link for ZyBorg/1.0 (wn-14.zyborg@looksmart.net; http://www.WISEnutbot.com. Moved location to above wisenut to avoid classification as wisenut +# 2005-12-15 +# added FAST Enteprise Crawler/6 (www dot fastsearch dot com). Note spelling Enteprise not Enterprise. +# added findlinks http://wortschatz.uni-leipzig.de/findlinks/ +# added IBM Almaden Research Center WebFountain™ http://www.almaden.ibm.com/cs/crawler [hc3] +# added INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents) +# added lmspider (lmspider@scansoft.com) http://www.nuance.com/ +# added noxtrumbot http://www.noxtrum.com/ +# added SandCrawler (Microsoft) +# added SBIder http://www.sitesell.com/sbider.html +# added SeznamBot http://fulltext.seznam.cz/ +# added sohu-search http://corp.sohu.com/ (looked for //robots.txt not /robots.txt) +# added the ruffle SemanticWeb crawler v0.5 - http://www.unreach.net +# added WebVulnCrawl/1.0 libwww-perl/5.803 (looked for //robots.txt not /robots.txt) +# added Yahoo! Japan keyoshid http://www.yahoo.co.jp/ +# added Y!J http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html +# added link for GigaBot +# added link for MagpieRSS +# added link for MSIECrawler +# 2005-12-21 +# added aipbot http://www.aipbot.com aipbot@aipbot.com [matthys70 users.sourceforge.net] +# added Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp) +# added Fast-Search-Engine http://www.fast-search-engine.com/ [matthys70 users.sourceforge.net] +# added g2Crawler (nobody@airmail.net) http://crawler.instantnetworks.net/ +# added Jakarta commons-httpclient http://jakarta.apache.org/commons/httpclient/ (hit robots.txt). May be used as robot or browser - a site may want to remove this entry. +# added OmniExplorer_Bot http://www.omni-explorer.com/ [matthys70 users.sourceforge.net] +# added USTC-Semantic-Group ai.ustc.edu.cn/mas/en/research/index.php ? +# 2005-12-22 +# added EARTHCOM.info www.earthcom.info +# added HTTrack off-line browser 'httrack','HTTrack', http://www.httrack.com/ [Moizes Gabor] +# added KummHttp http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b [Moizes Gabor] +# 2006-01-01 +# added Dulance http://www.dulance.com/bot.jsp +# added MojeekBot http://www.mojeek.com/bot.html +# added nicebot http://www.egghelp.org/setup.htm ? +# added Snappy http://www.urltrends.com/faq.php +# added sohu agent +# added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net] +# added zspider http://feedback.redkolibri.com/ +# 2006-01-13 +# added boitho.com-dc http://www.boitho.com/dcbot.html +# added IRLbot http://irl.cs.tamu.edu/crawler +# added virus_detector virus_harvester@securecomputing.com +# added Wavefire http://www.wavefire.com; info@wavefire.com + +# added WebFilter Robot +# 2006-01-24 +# added Shim-Crawler http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp +# added Exabot exabot.com +# added LetsCrawl.com http://letscrawl.com +# added ichiro http://help.goo.ne.jp/door/crawlerE.html +# 2006-01-27 additional 22 robots from a list provided by Moizes Gabor +# added ALeadSoftbot http://www.aleadsoft.com/bot.htm +# added CipinetBot http://www.cipinet.com/bot.html +# added Cuasarbot http://www.cuasar.com/ +# added Dumbot http://www.dumbfind.com/ +# added Extreme_Picture_Finder http://www.exisoftware.com/ +# added Fooky.com/ScorpionBot/ScoutOut http://www.fooky.com/scorpionbots +# added IlTrovatore-Setaccio http://www.iltrovatore.it/aiuto/motore_di_ricerca.html bot@iltrovatore.it +# added InsurancoBot http://www.fastspywareremoval.com/ +# added InternetArchive http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org +# added KazoomBot http://www.kazoom.ca/bot.html kazoombot@kazoom.ca +# added Kurzor http://www.easymail.hu/ cursor@easymail.hu +# added NutchCVS http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org +# added NutchOSU-VLIB http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org +# added Orbiter http://www.dailyorbit.com/bot.htm +# added PHP_version_tracker http://www.nexen.net/phpversion/bot.php +# added SuperBot http://www.sparkleware.com/superbot/ +# added SynooBot http://www.synoo.de/bot.html webmaster@synoo.com +# added TestBot http://www.agbrain.com/ +# added TutorGigBot http://www.tutorgig.info/ +# added WebIndexer mailto://webindexerv1@yahoo.com +# added WebMiner http://64.124.122.252/feedback.html +# 2006-02-01 +# added heritrix https://sourceforge.net/forum/message.php?msg_id=3550202 +# added Zeus Webster Pro https://sourceforge.net/forum/message.php?msg_id=3141164 +# additional robots from a list provided by Moizes Gabor [ mojzi -a-t- free mail hu ] +# added Candlelight_Favorites_Inspector +# added DomainChecker +# added EasyDL +# added FavOrg +# added Favorites_Sweeper +# added Html_Link_Validator +# added Internet_Ninja +# added JRTwine_Software_Check_Favorites_Utility +# fixed Microsoft_URL_Control +# added miniRank +# added Missigua_Locator +# added NPBot +# added Ocelli +# added Onet.pl_SA +# added proodleBot +# added SearchGuild_DMOZ_Experiment +# added Susie +# added Website_Monitoring_Bot +# added Xenu_Link_Sleuth +# 2006-05-15 +# added ASPseek http://www.aspseek.org/ +# added AdamM Bot http://home.blic.net/adamm/ +# added archive.org_bot http://crawls.archive.org/collections/bncf/crawl.html +# added arianna.libero.it (Italian Portal/search engine) +# added Biz360 spider http://www.biz360.com +# added BlogBridge Service http://www.blogbridge.com/ +# added BlogSearch http://www.icerocket.com/ +# added libcrawl +# added edgeio-relanshanbottriever http://www.edgeio.com +# added FeedFlow http://feedflow.com/about +# added Biblioteca Nazionale Centrale di Firenze (Italian National Archive) http://www.bncf.firenze.sbn.it/raccolta.txt +# added Java catchall - used by many spam bots +# added lanshanbot http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_g_l_140406_1%5Cb +# added msnbot-media http://search.msn.com/msnbot.htm +# added MT::Telegraph::Agent +# added Netluchs http://www.netluchs.de/ (German SE bot) +# added oBot http://www.webmasterworld.com/forum11/1616.htm +# added Onfolio http://www.onfolio.com/ (IE Toolbar plugin) - hit rss feeds. +# added ping.blo.gs http://blo.gs/ping.php blog bot +# added Sphere Scout http://www.sphere.com/ +# added sproose crawler http://www.sproose.com/bot.html +# added SyndicAPI http://syndicapi.com/bot.html +# added Yahoo! Mindset http://mindset.research.yahoo.com/ +# added msrabot +# added Vagabondo & Vagabondo-WAP http://www.wise-guys.nl/Contact/index.php?botselected=webagents)#=uk +# fixed Missigua Locator detection (Missigua_Locator -> Missigua Locator) +# changed echo to echo! to avoid conflict with the bonecho (Firefox 2.0) browser. +# This requires you to reprocess historic logs if you want EchO! to be recognized for older reports. +# 2006-05-17 +# added Alpha Search Agent # 62.152.125.60 Eurologon Srl +# added Krugle http://www.krugle.com/crawler/info.html the search engine for developers +# added Octora Beta Bot http://www.octora.com/ # Blog and Rss Search Engine +# added UbiCrawler http://law.dsi.unimi.it/ubicrawler/ +# added Yahoo! Slurp China http://misc.yahoo.com.cn/help.html +# You must reprocess old logs for the Yahoo! Slurp China bot to be detected in old reports +# 2006-05-20 +# added 1-More Scanner http://www.myzips.com/software/1-More-Scanner.phtml +# added Accoona-AI-Agent http://www.accoona.com/ +# added ActiveBookmark http://www.libmaster.com/active_bookmark.php +# added BIGLOTRON http://www.biglotron.com/robot.html +# added Bookmark-Manager http://bkm.sourceforge.net/ +# added cbn00glebot +# added Cerberian Drtrs http://www.pgts.com.au/cgi-bin/psql?robot_info=25240 +# added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork +# added CheckWeb link validator http://p.duby.free.fr/chkweb.htm +# added Computer and Automation Research Institute Crawler http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html +# added ConveraCrawler http://www.authoritativeweb.com/crawl/ +# added ConveraMultiMediaCrawler http://www.authoritativeweb.com/crawl/ +# added CSE HTML Validator Lite Online http://online.htmlvalidator.com/php/onlinevallite.php +# added Cursor http://adcenter.hu/docs/en/bot.html +# added Custo http://www.netwu.com/custo/ +# added DataFountains/DMOZ Downloader http://infomine.ucr.edu/ +# added Deepindex http://www.deepindex.net/faq.php +# added DNSGroup http://www.dnsgroup.com/ +# added DoCoMo http://www.nttdocomo.co.jp/ +# added dumm.de-Bot http://www.dumm.de/ +# added ETS v http://www.freetranslation.com/help/ +# added eventax http://www.eventax.de/ +# added FAST Enterprise Crawler * crawleradmin.t-info@telekom.de http://www.telekom.de/ +# added FAST Enterprise Crawler http://www.fast.no/ +# added FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de http://www.telekom.de/ +# added FeedValidator http://feedvalidator.org/ +# added FilmkameraBot http://www.filmkamera.at/bot.html +# added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece +# added Global Fetch http://www.wesonet.com/ +# added GOFORITBOT http://www.goforit.com/about/ +# added GoForIt.com http://www.goforit.com/about/ +# added GPU p2p crawler http://gpu.sourceforge.net/search_engine.php +# added HooWWWer http://cosco.hiit.fi/search/hoowwwer/ +# added HPPrint +# added HTMLParser http://htmlparser.sourceforge.net/ +# added Hundesuche.com-Bot http://www.hundesuche.com/ +# added InfoBot http://www.infobot.org/ +# added InfociousBot http://corp.infocious.com/tech_crawler.php +# added InternetSupervision http://internetsupervision.com/ +# added isearch2006 http://www.yahoo.com.cn/ +# added IUPUI_Research_Bot http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/ +# added KalamBot http://64.124.122.251/feedback.html +# added kamano.de NewsFeedVerzeichnis http://www.kamano.de/ +# added Kevin http://dznet.com/kevin/ +# added KnowItAll http://www.cs.washington.edu/research/knowitall/ +# added Knowledge.com http://www.knowledge.com/ +# added Kouaa Krawler http://www.kouaa.com/ +# added ksibot http://ego.ms.mff.cuni.cz/ +# added Link Valet Online http://www.htmlhelp.com/tools/valet/ +# added lwp-request http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request +# added lwp-trivial http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm +# added MapoftheInternet.com http://MapoftheInternet.com/ +# added Matrix S.p.A. - FAST Enterprise Crawler http://tin.virgilio.it/ +# added Megite http://www.megite.com/ +# added Metaspinner http://index.meta-spinner.de/ +# added Mini-reptile +# added Misterbot http://www.misterbot.fr/ +# added Miva http://www.miva.com/ +# added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_m_141105_2\b +# added MSRBOT http://research.microsoft.com/research/sv/msrbot/ +# added MS SharePoint Portal Server - MS Search 4.0 Robot http://support.microsoft.com/default.aspx?scid=kb;en-us;284022 +# added Mydoyouhike http://www.doyouhike.net/my +# added NASA Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b +# added NetSprint http://www.netsprint.pl/serwis/ +# added NimbleCrawler http://www.healthline.com/ +# added OpenWebSpider http://www.openwebspider.org/ +# added Oracle Ultra Search http://www.oracle.com/technology/products/ultrasearch/index.html +# added OSSProxy http://www.marketscore.com/FAQ.Aspx +# added passwordmaker.org http://passwordmaker.org/ +# added PEAR HTTP Request class http://pear.php.net/ +# added PEERbot http://www.peerbot.com/ +# added PHP version tracker http://www.nexen.net/phpversion/bot.php +# added PictureOfInternet http://malfunction.org/poi/ +# added plinki http://www.plinki.com/ +# added Port Huron Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1133\b +# added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1135\b +# added ProjectWF-java-test-crawler +# added PyQuery http://sourceforge.net/projects/pyquery/ +# added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/ +# added Scumbot +# added Sensis Web Crawler http://www.sensis.com.au/ +# added snap.com beta crawler http://www.snap.com/ +# added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/ +# added STEROID Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm +# added Suchfin-Bot http://www.suchfin.de/ +# added Sunrise http://www.sunrisexp.com/ +# added Tagyu Agent http://www.tagyu.com/ +# added Tcl http client package http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm +# added TeragramCrawlerSURF http://www.teragram.com/ +# added Test Crawler http://netp.ath.cx/ +# added UnChaos Bot Hybrid Web Search Engine http://www.unchaos.com/ +# added unido-bot http://www.unchina.org/unido/unido/our_projects/3_3.html +# added UniversalFeedParser http://feedparser.org/ (seen from md301000.inktomisearch.com) +# added updated http://www.updated.com/ +# added Vermut http://vermut.aol.com +# added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html +# added Vespa Crawler (Yahoo Norway?) http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb +# added VSE http://www.vivisimo.com/ +# added webcrawl.net http://www.webcrawl.net/ +# added Web Downloader http://www.krasu.ru/soft/chuchelo/ +# added Webdup http://www.webdup.com/en/index.html +# added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b +# added WordPress http://wordpress.org/ +# added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/ +# added Xenu's Link Sleuth (with ') +# added xirq http://www.xirq.com/ +# added yoogliFetchAgent http://www.yoogli.com/ +# added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/ +# -- fix - some robots were reported with _ where _ should have been a space. +# changed Xenu Link Sleuth +# changed microsoft[_+\s]url[_+\s]control -> microsoft_url_control +# changed favorites_sweeper -> favorites_sweeper +# -- updates +# updated AskJeeves to Ask +# 2012-06-05 Albrecht Mueller +# added Grabber from SDSC (San Diego Supercomputer Center). +# 2013-09-30 Albrecht Mueller +# AWStats probably cannot detect this bot as it identifies itself in +# the referrer field and not in the user agent string. +#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-" +#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-" +#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-" + +# to do MS Search 4.0 Robot + +#package AWSROB; + + +# Robots list was found at http://www.robotstxt.org/wc/active/all.txt +# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html +# Rem: To avoid bad detection, some robot's ids were removed from this list: +# - Robots with ID of 3 letters only +# - Robots called 'webs' and 'tcl' +# Rem: directhit changed into direct_hit (its real id) +# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser +# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser +# Rem: roadrunner changed into road_runner +# Rem: lycos changed to lycos_ to avoid confusion with lycos-online browser +# Rem: voyager changed into ^voyager\/ to avoid to exclude voyager and amigavoyager browser + +# RobotsSearchIDOrder +# It contains all matching criteria to search for in log fields. This list is +# used to know in which order to search Robot IDs. +# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more +# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more +# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+\s]' and are quoted. +#------------------------------------------------------- + +@RobotsSearchIDOrder_list1 = ( +# Common robots (In robot file) +'bingbot/', +'bingpreview', +'MSIECrawler', +'msnbot/', +'msnbot\-media/', +'AdIdxBot/', +'NOT[\x20]Googlebot/', +'Googlebot/', +'Google[\x20]Web[\x20]Preview', +'Googlebot\-Image/', +'Googlebot\-Mobile/', +'google\-sitemaps', +'Googlebot\-News', +'Googlebot\-Video/', +'AdsBot\-Google[\x20]\(', +'AdsBot\-Google\-Mobile\-Apps', +'Mediapartners-Google', +'Feedfetcher\-Google', +'Google\-Adwords\-Instant', +'Firefox/1\.5', +'Yahoo![\x20]Slurp[\x20]China', +'Yahoo![\x20]Slurp', +'Baiduspider/', +'Baiduspider\-image', +'baidu', +'YandexBot/', +'YandexImages/', +'YandexMetrika/', +'YandexMobileBot/', +'yandex', +'electricmonk/', +'spbot/', +'SeznamBot/', +'msie8', +'AhrefsBot/', +'007ac9[\x20]Crawler', +'2345Explorer/', +'360Spider', +'A[\x20]Simple[\x20]Crawler', +'Abrave', +'acapbot/', +'Accoona\-AI\-Agent/', +'AdnormCrawlerCatchBot/', +'aiHitBot/', +'aipbot/', +'Apache\-HttpClient/', +'Apexoo[\x20]Spider', +'Applebot/', +'archive\.org_bot', +'Babya[\x20]Discoverer', +'BDCbot/', +'BinGet/', +'bl\.uk_lddc_bot/', +'BLEXBot/', +'boitho\.com\-dc/', +'BusinessBot:', +'CatchBot/', +'CB/Nutch', +'CCBot/', +'Cliqzbot/', +'CMS[\x20]Crawler', +'ConveraCrawler/', +'cosmos/', +'crawl/Nutch', +'Crawler/', +'crawler4j', +'CRAZYWEBCRAWLER', +'CSE[\x20]HTML[\x20]Validator', +'C\-T[\x20]bot', +'Curl/PHP', +'Dalvik/', +'DataCrawler/', +'Deepnet[\x20]Explorer', +'DeuSu/', +'Digincore', +'Discordbot/', +'DoCoMo/', +'Domain[\x20]Re\-Animator[\x20]Bot', +'DomainCrawler/', +'DomainMacroCrawler/', +'DomainSONOCrawler/', +'DomainStatsBot/', +'DotBot/', +'DuckDuckGo\-Favicons\-Bot/', +'ELinks/', +'ELinks[\x20]\(', +'EmailMarketingRobot/', +'EmeraldShield\.com[\x20]WebBot', +'envolk\[ITS\]spider/', +'EsperanzaBot', +'Exabot/', +'facebookexternalhit/', +'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de', +'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de', +'FAST\-WebCrawler/', +'Feosey[\x20]Mohk[\x20]Crawler', +'findlinks/', +'Findxbot/', +'FirePHP/', +'FlippyBearBot/', +'FreeWebMonitoring[\x20]SiteChecker/', +'fujilabol', +'FurlBot/', +'Gaisbot/', +'Gallent[\x20]Spider', +'GarlikCrawler/', +'GetintentCrawler[\x20]getintent\.com', +'Gigabot/', +'gipo\-crawler/Nutch', +'Girafabot', +'Gluten[\x20]Free[\x20]Crawler/', +'gocrawl', +'GrapeshotCrawler/', +'GSiteCrawler/', +'GurujiBot/', +'HaosouSpider', +'holmes/', +'HTTP_Request2/', +'HubSpot[\x20]Webcrawler', +'HyperCrawl/', +'ICC\-Crawler/', +'iconoclast', +'IDGCrawler/Nutch', +'idmarch[\x20]Automatic\.beta/', +'Incutio[\x20]XML', +'InfluenceBot', +'IRLbot/', +'IssueCrawler', +'istellabot/', +'James[\x20]BOT', +'Jigsaw/', +'JobFeed', +'KomodiaBot/', +'Konqueror/', +'linkapediabot', +'metager\-linkchecker', +'linkchecker', +'LinkCheck', +'linkdexbot/', +'LinkedInBot/', +'LinkpadBot/', +'Links[\x20]\(', +'LinksManager\.com_bot', +'LWP::Simple/', +'Mail\.RU_Bot/', +'meanpathbot/', +'Mediatoolkitbot', +'MegaIndex\.ru/', +'merzscope', +'mfibot/', +'microsoft.*discovery', +'missigua_locator', +'MixrankBot', +'MJ12bot/', +'MojeekBot/', +'Mojolicious', +'MXT/Nutch', +'My[\x20]Nutch[\x20]Spider/', +'myse/Nutch', +'NerdyBot', +'netEstate[\x20]NE[\x20]Crawler', +'NetResearchServer/', +'NRLCorpusBuilder/Nutch', +'nutch\-1\.4/', +'nutch\-1\.8/', +'NutchCVS/', +'o\.uk[\x20]robot', +'oBot/', +'ocrawler;', +'ODP[\x20]link[\x20]checker', +'Offline[\x20]Explorer/', +'OmniExplorer_Bot/', +'OrangeBot/', +'PageBitesHyperBot/', +'pdffillerbot/', +'PhantomJS', +'PHP/5\.2\.8', +'Ploetz[\x20]\+[\x20]Zeller', +'Plukkie/', +'Princetonbot/', +'PrivacyAwareBot/', +'proximic', +'psbot/', +'psbot\-image', +'python_wk_crawler', +'Python\-urllib/', +'QCrawl', +'ResearchBot', +'roboto', +'rogerbot/', +'RSSingBot', +'RukiCrawler/', +'SafeDNS[\x20]search[\x20]bot/', +'SafeDNSBot', +'SafeSearch[\x20]microdata[\x20]crawler', +'safesearch', +'SBL\-BOT', +'Screaming[\x20]Frog[\x20]SEO[\x20]Spider/', +'ScreenerBot[\x20]Crawler[\x20]Beta', +'Searchie/', +'Seekmo', +'semanticbot', +'SemrushBot/', +'seo\-audit\-check\-bot/', +'Seobility', +'SEOkicks\-Robot', +'SEOlyticsCrawler/', +'SEOstats', +'Seosys/Nutch', +'Seoterritory\.com[\x20]bot', +'Shim\-Crawler', +'SiteExplorer/', +'siteexplorer\.info', +'Slackbot\-LinkExpanding', +'SmabblerBot/', +'Sogou[\x20]web[\x20]spider/', +'special_archiver/', +'Spiderbot/', +'ssearch_bot', +'SurdotlyBot/', +'SurveyBot/', +'taiil/Nutch', +'tbot\-nutch', +'TeeRaidBot', +'TelegramBot', +'Test/Nutch', +'Test[\x20]Spider', +'TestCrawler', +'TurnitinBot/', +'TurnitinBot', +'TweetmemeBot/', +'UCY/Nutch', +'Uptimebot/', +'URL[\x20]Checker', +'UXCrawlerBot', +'Validator\.nu/', +'vBSEO', +'vBulletin[\x20]via[\x20]PHP', +'vebidoobot', +'viz/Nutch', +'VoilaBot', +'VORTEX/', +'voyager/', +'W3C_Validator/', +'W3C\-checklink/', +'WBSearchBot/', +'WeSEE:Ads/PageBot', +'WeSEE:Ads/PictureBot', +'WeSEE_Bot', +'Wget/', +'Who\.is[\x20]Bot', +'wonderbot/', +'woobot/', +'Wotbox/', +'Xaldon[\x20]WebSpider', +'Xenu[\x20]Link[\x20]Sleuth', +'xenu_link_sleuth', +'XML[\x20]Sitemaps[\x20]Generator', +'XoviBot/', +'yacybot', +'Yahoo[\x20]Link[\x20]Preview', +'YisouSpider', +'yoozBot', +'zspider/', +'ZumBot/', +# below placed at end to catch some generics +'nbot', +'ng/1\.', +'ng/2\.', +'libwww\-perl', +'CFNetwork', +'urllib', +'javabee', +'projectwf\-java\-test\-crawler', +'java', +'loocalcrawler/nutch', +'nutchosu\-vlib', +'nutch', +'perlcrawler', +'perl', +# old robots using firefox < version 11 not identifying themselves as a robot. +'(firefox/)([0-9]\.|[0-1][0]\.)' +); + +@RobotsSearchIDOrder_list2 = ( +# Less common robots (In robot file) +'^Mozilla$', +'^mozilla\/3\.0\s\(compatible$', +'^mozilla\/4\.0$', +'^mozilla\/4\.0\s\(compatible;\)$', +'^mozilla\/5\.0$', +'^mozilla\/5\.0\s\(compatible;$', +'^mozilla\/5\.0\s\(en\-us\)$', +'^mozilla\/5\.0\sfirefox\/3\.0\.5$', +'^Mozilla/6\.0[\x20]\(compatible\)$', +'^Mozilla/(.*)Beta[\x20]\(Windows\)', +'MSIE[\x20]2', +'MSIE[\x20]3', +'MSIE[\x20]4', +'MSIE[\x20]5', +'MSIE[\x20]6', +'Windows[\x20]95', +'Windows[\x20]98', + +# these could be removed to speed up processing as they are rarely seen +'a6\-indexer', +'abcdatos', +'abonti\.com', +'acme\.spider', +'activebookmark', +'adamm_bot', +'advbot', +'affectv\.co\.uk', +'ahoythehomepagefinder', +'aleadsoftbot', +'alkaline', +'allrati', +'alltop', +'almaden', +'alpha_search_agent', +'anthill', +'antibot', +'aport', +'appie', +'applesyndication', +'arachnophilia', +'arale', +'araneo', +'architext', +'archive\-de\.com', +'aretha', +'argus', +'ariadne', +'arianna\.libero\.it', +'arks', +'aspider', +'aspseek', +'asterias', +'asynchttpclient', +'atn\.txt', +'atomz', +'auresys', +'awbot', +'backlinktest\.com', +'backrub', +'bbot', +'becomebot', +'bender', +'betabot', +'bigbrother', +'biglotron', +'BingLocalSearch', +'bittorrent_bot', +'biz360[_+\s]spider', +'bjaaland', +'blackwidow', +'blindekuh', +'blogbridge[_+\s]service', +'blogged_crawl', +'bloglines', +'bloglovin', +'blogpulse', +'blogsearch', +'blogshares', +'blogslive', +'blogssay', +'bloodhound', +'bncf\.firenze\.sbn\.it/raccolta\.txt', +'bobby', +'bookmark\-manager', +'borg\-bot', +'boris', +'brightnet', +'bruinbot', +'bspider', +'bubing', +'bumblebee', +'butterfly', +'buzztracker', +'cactvschemistryspider', +'calif[^r]', +'candlelight[_+\s]favorites[_+\s]inspector', +'careerbot', +'carpathia', +'cassandra', +'catbot', +'cbn00glebot', +'cerberian_drtrs', +'cfetch', +'cgireader', +'chattertrap', +'check_http', +'checkbot', +'checkweb_link_validator', +'christcrawler', +'churl', +'cienciaficcion', +'cipinetbot', +'imagecoccoc', +'coccoc', +'coldfusion', +'collective', +'combine', +'commons\-httpclient', +'computer_and_automation_research_institute_crawler', +'conceptbot', +'contentmatch', +'converamultimediacrawler', +'coolbot', +'copubbot', +'core', +'covario', +'cruiser', +'cscrawler', +'cuasarbot', +'cursor', +'cusco', +'custo', +'cyberspyder', +'datafountains/dmoz_downloader', +'dataprovider\.com', +'daumoa', +'daviesbot', +'daylifefeedfetcher', +'daypopbot', +'deepindex', +'desertrealm', +'deweb', +'dienstspider', +'digger', +'digout4u', +'diibot', +'dipsie\.bot', +'direct_hit', +'discobot', +'dlvr\.it', +'dnabot', +'dnsgroup', +'doccheckbot', +'domainappender', +'domainchecker', +'domainsdb\.net', +'download_express', +'dragonbot', +'dreamwidth', +'drupal', +'dulance', +'dumbot', +'dumm\.de\-bot', +'dwcp', +'e\-collector', +'earthcom\.info', +'easydl', +'ebiness', +'eccp', +'echo!', +'edgeio\-retriever', +'elfinbot', +'emacs', +'emcspider', +'enteprise', +'ernst[:blank:]2\.0', +'esther', +'ets_v', +'eventax', +'everbeecrawler', +'everest\-vulcan', +'evliyacelebi', +'exactseek', +'extreme[_+\s]picture[_+\s]finder', +'ezoom', +'ezresult', +'facebook', +'facebot', +'fast\-search\-engine', +'matrix_s\.p\.a\._\-_fast_enterprise_crawler', +'fast_enterprise_crawler', +'fastbot', +'fastcrawler', +'favicon', +'favorg', +'favorites_sweeper', +'fdse', +'feedburner', +'feedcrawl', +'feedflow', +'feedmyinbox', +'feedroll\.com', +'feedsky', +'feedster', +'feedvalidator', +'feedzira', +'felix', +'ferret', +'fetchbot', +'fetchrover', +'fever/', +'fido', +'filmkamerabot', +'filterdb\.iss\.net', +'finderlein[_+\s]research[_+\s]crawler', +'findexa_crawler', +'finnish', +'fireball', +'firmilybot', +'flexum', +'foaf\-search\.net', +'fooky\.com/ScorpionBot', +'fouineur', +'francoroute', +'freecrawl', +'freenews', +'funnelweb', +'g2crawler', +'gama', +'gazz', +'gcreep', +'geniebot', +'genieo', +'geohasher', +'getbot', +'geturl', +'gigablastopensource', +'global_fetch', +'gnodspider', +'goforit\.com', +'goforitbot', +'golem', +'gonzo', +'gougou', +'gpu_p2p_crawler', +'grabber', +'grapeshot', +'grapnel', +'griffon', +'gromit', +'grub', +'gulliver', +'gulperbot', +'hambot', +'hanrss', +'harvest', +'havindex', +'henrythemiragorobot', +'heritrix', +'hl_ftien_spider', +'hometown', +'hoowwwer', +'hpprint', +'htdig', +'html[_+\s]link[_+\s]validator', +'htmlgobble', +'htmlparser', +'httrack', +'hundesuche\.com\-bot', +'hyperdecontextualizer', +'ia_archiver\-web\.archive\.org', +'ia_archiver', +'iajabot', +'iaskspider', +'i\-bot', +'icarus6j', +'ichiro', +'icjobs\.de', +'ilse', +'iltrovatore\-setaccio', +'imagelock', +'implisensebot', +'inagist', +'incywincy', +'infobot', +'infociousbot', +'infohelfer', +'infomine', +'informant', +'infoseeksidewinder', +'infoseek', +'infospider', +'inspectorwww', +'insurancobot', +'integromedb\.org', +'intelliagent', +'internet[_+\s]ninja', +'internetarchive', +'internetseer', +'internetsupervision', +'ips\-agent', +'irobot', +'iron33', +'isearch2006', +'israelisearch', +'iupui_research_bot', +'izsearch', +'jacobin[\x20]club', +'jakarta', +'jbot', +'jcrawler', +'jeeves', +'jennybot', +'jobboerse', +'jobot', +'jobo', +'joebot', +'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility', +'js\-kit', +'jubii', +'jumpstation', +'justview', +'kalambot', +'kamano\.de_newsfeedverzeichnis', +'kapsi', +'katipo', +'kazoombot', +'kevin', +'keyoshid', +'kilroy', +'kinja\-imagebot', +'kinjabot', +'knowitall', +'knowledge\.com', +'ko[_+\s]yappo[_+\s]robot', +'kouaa_krawler', +'krugle', +'ksibot', +'kummhttp', +'kurzor', +'labelgrabber\.txt', +'lanshanbot', +'larbin', +'largesmall[\x20]crawler', +'legs', +'letscrawl\.com', +'libcrawl', +'lilina', +'link_valet_online', +'linkbot', +'linkdex\.com', +'linkidator', +'linkscan', +'linkstats[\x20]bot', +'linkwalker', +'lipperhey', +'livejournal\.com', +'lmspider', +'loadtimebot', +'lockon', +'logo_gif', +'longurl', +'lssrocketcrawler', +'ltbot', +'ltx71', +'lwp\-request', +'lwp\-trivial', +'lycos[_+\s]', +'macworm', +'madaali\.de', +'magpierss', +'magpie', +'mapoftheinternet\.com', +'marvin', +'mattie', +'mediabot', +'mediafox', +'megaindex', +'megite', +'memorybot', +'mercator', +'meshexplorer', +'metager2\-verification\-bot', +'metajobbot', +'metaspinner', +'metauri', +'miadev', +'microsoft[_+\s]url[_+\s]control', +'microsoft[\x20]bits', +'microsoft\-webdav\-miniredir', +'mindcrawler', +'mindupbot', +'mini\-reptile', +'minirank', +'misterbot', +'miva', +'mizzu_labs', +'mnogosearch', +'moget', +'momspider', +'monster', +'motor', +'movabletype', +'ms[_+\s]search[_+\s]6\.0[_+\s]robot', +'ms_search_4\.0_robot', +'msnbot\-udiscovery', +'msrabot', +'msrbot', +'mt::telegraph::agent', +'muncher', +'muscatferret', +'mwdsearch', +'mydoyouhike', +'myweb', +'nagios', +'nasa_search', +'ndspider', +'nederland\.zoek', +'netcarta', +'netcraft', +'netluchs', +'netmechanic', +'netnewswire', +'netscoop', +'netsprint', +'netvibes', +'newrelicpinger', +'newscan\-online', +'newsfox', +'newsgatoronline', +'nextgensearchbot', +'nhse', +'nicebot', +'nimblecrawler', +'ning', +'nomad', +'northstar', +'noxtrumbot', +'npbot', +'nzexplorer', +'objectssearch', +'occam', +'ocelli', +'octopus', +'octora_beta_bot', +'onet\.pl[_+\s]sa', +'onfolio', +'openfind', +'opentaggerbot', +'openwebspider', +'optimizer', +'oracle_ultra_search', +'orb_search', +'orbiter', +'packrat', +'pageboy', +'panscient', +'parasite', +'passwordmaker\.org', +'patric', +'pear_http_request_class', +'peerbot', +'pegasus', +'perignator', +'perman', +'petersnews', +'phantom', +'php[_+\s]version[_+\s]tracker', +'phpcrawl', +'phpdig', +'picmole', +'pictureofinternet', +'piltdownman', +'pimptrain', +'ping\.blo\.gs', +'pingdom', +'pioneer', +'pita', +'pitkow', +'pjspider', +'plinki', +'pluckfeedcrawler', +'plumtreewebaccessor', +'pogodak', +'pompos', +'popdexter', +'poppi', +'port_huron_labs', +'portalb', +'postfavorites', +'postpost', +'postrank', +'powermarks', +'printfulbot', +'proodlebot', +'protopage', +'publiclibraryarchive', +'pyquery', +'python', +'qihoobot', +'quipply', +'qwantify', +'r6\_', +'rambler', +'ratingburner', +'raven', +'rbse', +'redalert', +'regator', +'relevantnoise\.com', +'resumerobot', +'rhcs', +'riddler', +'road_runner', +'robbie', +'robi', +'robocrawl', +'robofox', +'robozilla', +'rojo', +'rome[\x20]client', +'roverbot', +'rpt\-httpclient', +'rssgraffiti', +'rssimagesbot', +'ruffle', +'rufusbot', +'rules', +'safeads\.xyz', +'safetynetrobot', +'sage\+\+', +'sandcrawler', +'savetheworldheritage', +'sbider', +'schizozilla', +'scooter', +'scoutjet', +'scumbot', +'search\-info', +'search_au', +'searchguild[_+\s]dmoz[_+\s]experiment', +'searchmetricsbot', +'searchprocess', +'seekbot', +'semalt', +'senrigan', +'sensis_web_crawler', +'seodiver', +'seokicks\.de', +'seoscanners', +'sgscout', +'shaggy', +'shaihulud', +'shareaholicbot', +'shoutcast', +'sift', +'simbot', +'simplepie', +'sistrix', +'site\-valet', +'sitebot', +'sitedomain\-bot', +'sitetech', +'skimbot', +'skymob', +'slcrawler', +'slurp', +'slysearch', +'smartspider', +'smtbot', +'snap\.com_beta_crawler', +'snappy', +'snooper', +'sohu\-search', +'sohu', +'solbot', +'speedy', +'sphere_scout', +'spider[_+\s]monkey', +'spiderline', +'spiderlytics', +'spiderman', +'spiderview', +'spip', +'sproose_crawler', +'spry', +'sqworm', +'ssearcher', +'steeler', +'steroid__download', +'stq_bot', +'Stratagems[\x20]Kumo', +'suchfin\-bot', +'suke', +'summify\.com', +'sunrise', +'suntek', +'superbot', +'superfeedr', +'susie', +'sven', +'syndic8', +'syndicapi', +'synoobot', +'synthesio', +'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e', +'tach_bw', +'tagyu_agent', +'tailrank', +'tarantula', +'tarspider', +'tcl_http_client_package', +'techbot', +'technoratibot', +'templeton', +'teoma', +'teragramcrawlersurf', +'test_crawler', +'testbot', +'thumbsniper', +'titan', +'titin', +'tkwww', +'tlspider', +'topblogsinfo', +'topicblogs', +'topix\.net', +'trapit', +'trileet', +'turtlescanner', +'turtle', +'tutorgigbot', +'tweetedtimes', +'twiceler', +'twisted[\x20]pagegetter', +'twitterbot', +'twitterfeed', +'ubicrawler', +'ucsd', +'udmsearch', +'ultraseek', +'unchaos_bot_hybrid_web_search_engine', +'unido\-bot', +'unisterbot', +'universalfeedparser', +'unlost_web_crawler', +'unwindfetchor', +'updated', +'urlck', +'ustc\-semantic\-group', +'vagabondo\-wap', +'vagabondo', +'valkyrie', +'vermut', +'versus_crawler_from_eda\.baykan@epfl\.ch', +'verticrawl', +'vespa_crawler', +'victoria', +'virus[_+\s]detector', +'visionsearch', +'voidbot', +'voltron', +'vse/', +'vwbot', +'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa', +'w3index', +'w3m2', +'wallpaper', +'wanderer', +'wapspider', +'wapspIRLider', +'watchmouse', +'wavefire', +'waybackarchive\.org', +'wazzup', +'web_downloader', +'webbandit', +'webbase', +'webcatcher', +'webclipping\.com', +'webcollage', +'webcompass', +'webcopy', +'webcrawl\.net', +'webdup', +'webfetcher', +'webfilter', +'webfoot', +'webinator', +'webindexer', +'weblayers', +'weblinker', +'webminer', +'webmirror', +'webmoose', +'webquest', +'webreader', +'webreaper', +'website[_+\s]monitoring[_+\s]bot', +'websnarf', +'webspider', +'webvac', +'webvulncrawl', +'webwalker', +'webwalk', +'webwatch', +'wells_search', +'wer\-liefert\-was', +'wesee:search', +'wevikabot', +'whatuseek', +'whowhere', +'windows\-rss\-platform', +'wired\-digital', +'zyborg', +'wisenutbot', +'wiumi', +'wmir', +'wolp', +'wombat', +'wonderer', +'woozweb', +'wordpress', +'worm', +'wume_crawler', +'wwwc', +'wwweasel', +'wz101', +'xget', +'xirq', +'xydo', +'y!j', +'yahoo![\x20]searchmonkey', +'yahoo!_mindset', +'yahoo\-blogs', +'yahoo\-mmcrawler', +'yahoo\-newscrawler', +'yahoo[\x20]pipes', +'yahoo\-verticalcrawler', +'yahoocachesystem', +'yahooexternalcache', +'yahoofeedseeker', +'yahooseeker\-testing', +'yahooseeker', +'yahooysmcm', +'yammer', +'yanga', +'yet\-another\-spider', +'yeti', +'yie8', +'yodaobot', +'yooglifetchagent', +'youdao', +'yourls', +'z\-add_link_checker', +'zealbot', +'zemanta', +'zend_http_client', +'zeus', +'zhuaxia', +'[^a]fish', +'[\x20]netseer[\x20]', +'^[1-3]$', +'^finbot', +'^motorola$', +'^msie', +'^voyager/', +'^webindex$', +'1\-more_scanner' +); + +@RobotsSearchIDOrder_listgen = ( +# Generic robot +'robot', +'blog', +'checker', +'crawl', +'discover', +'feed', +'fetcher', +'hunter', +'link', +'scanner', +'seek', +'sitemap', +'spider', +'sucker', +'validator', +'bot[\s_+:,\.\;\/\\\-]', +'[\s_+:,\.\;\/\\\-]bot', +'curl', +'php', +'ruby/', +'no_user_agent' +); + + +# RobotsHashIDLib +# List of robots names ('robot id','robot clear text') +#------------------------------------------------------- +%RobotsHashIDLib = ( +# Common robots (In robot file) +'bingbot/','bingbot', +'bingpreview','BingPreview', +'MSIECrawler','MSIECrawler', +'msnbot/','msnbot', +'msnbot\-media/','msnbot-media', +'AdIdxBot/','AdIdxBot Microsoft Ad Quality control', +'NOT[\x20]Googlebot/','NOT Googlebot', +'Googlebot/','Googlebot', +'Google[\x20]Web[\x20]Preview','Google Web Preview', +'Googlebot\-Image/','Googlebot-Image', +'Googlebot\-Mobile/','Googlebot-Mobile', +'google\-sitemaps','google-sitemaps', +'Googlebot\-News','Googlebot-News', +'Googlebot\-Video/','Googlebot-Video', +'AdsBot\-Google[\x20]\(','AdsBot-Google', +'AdsBot\-Google\-Mobile\-Apps','AdsBot-Google-Mobile-Apps', +'Mediapartners\-Google','Mediapartners-Google', +'Feedfetcher\-Google','Feedfetcher-Google', +'Google\-Adwords\-Instant','Google-Adwords-Instant', +'Firefox/1\.5','Nautic Expo using Firefox/1.5', +'Yahoo![\x20]Slurp[\x20]China','Yahoo! Slurp China', +'Yahoo![\x20]Slurp','Yahoo! Slurp', +'Baiduspider/','Baiduspider', +'Baiduspider\-image','Baiduspider-image', +'baidu','Baidu ( catchall )', +'YandexBot/','YandexBot', +'YandexImages/','YandexImages', +'YandexMetrika/','YandexMetrika', +'YandexMobileBot/','YandexMobileBot', +'yandex','Yandex ( catchall )', +'electricmonk/','electricmonk', +'spbot/','spbot', +'SeznamBot/','SeznamBot', +'msie8','msie8 - ( Rogue Robot )', +'AhrefsBot/','AhrefsBot', +'007ac9[\x20]Crawler','007ac9 Crawler', +'2345Explorer/','2345Explorer', +'360Spider','360Spider', +'A[\x20]Simple[\x20]Crawler','A Simple Crawler', +'Abrave','Abrave', +'acapbot/','acapbot', +'Accoona\-AI\-Agent/','Accoona-AI-Agent', +'AdnormCrawlerCatchBot/','AdnormCrawlerCatchBot', +'aiHitBot/','aiHitBot', +'aipbot/','aipbot', +'Apache\-HttpClient/','Apache-HttpClient', +'Apexoo[\x20]Spider','Apexoo Spider', +'Applebot/','Applebot', +'archive\.org_bot','archive.org_bot', +'Babya[\x20]Discoverer','Babya Discoverer', +'BDCbot/','BDCbot', +'BinGet/','BinGet', +'bl\.uk_lddc_bot/','bl.uk_lddc_bot', +'BLEXBot/','BLEXBot', +'boitho\.com\-dc/','boitho.com-dc', +'BusinessBot:','BusinessBot:', +'CatchBot/','CatchBot', +'CB/Nutch','CB/Nutch', +'CCBot/','CCBot', +'Cliqzbot/','Cliqzbot', +'CMS[\x20]Crawler','CMS Crawler', +'ConveraCrawler/','ConveraCrawler', +'cosmos/','cosmos', +'crawl/Nutch','crawl/Nutch', +'Crawler/','Crawler', +'crawler4j','crawler4j', +'CRAZYWEBCRAWLER','CRAZYWEBCRAWLER', +'CSE[\x20]HTML[\x20]Validator','CSE HTML Validator', +'C\-T[\x20]bot','C-T bot', +'Curl/PHP','Curl/PHP', +'Dalvik/','Dalvik', +'DataCrawler/','DataCrawler', +'Deepnet[\x20]Explorer','Deepnet Explorer', +'DeuSu/','DeuSu', +'Digincore','Digincore', +'Discordbot/','Discordbot', +'DoCoMo/','DoCoMo', +'Domain[\x20]Re\-Animator[\x20]Bot','Domain Re-Animator Bot', +'DomainCrawler/','DomainCrawler', +'DomainMacroCrawler/','DomainMacroCrawler', +'DomainSONOCrawler/','DomainSONOCrawler', +'DomainStatsBot/','DomainStatsBot', +'DotBot/','DotBot', +'DuckDuckGo\-Favicons\-Bot/','DuckDuckGo-Favicons-Bot', +'ELinks/','ELinks', +'ELinks[\x20]\(','ELinks (', +'EmailMarketingRobot/','EmailMarketingRobot', +'EmeraldShield\.com[\x20]WebBot','EmeraldShield.com WebBot', +'envolk\[ITS\]spider/','envolk ITS spider', +'EsperanzaBot','EsperanzaBot', +'Exabot/','Exabot', +'facebookexternalhit/','facebookexternalhit', +'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de','FAST Enterprise crawleradmin.t-info@telekom.de', +'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de','FAST Enterprise T-Info_BI_cluster crawleradmin.t-info@telekom.de', +'FAST\-WebCrawler/','FAST-WebCrawler', +'Feosey[\x20]Mohk[\x20]Crawler','Feosey Mohk Crawler', +'findlinks/','findlinks', +'Findxbot/','Findxbot', +'FirePHP/','FirePHP', +'FlippyBearBot/','FlippyBearBot', +'FreeWebMonitoring[\x20]SiteChecker/','FreeWebMonitoring SiteChecker', +'fujilabol','fujilabol', +'FurlBot/','FurlBot', +'Gaisbot/','Gaisbot', +'Gallent[\x20]Spider','Gallent Spider', +'GarlikCrawler/','GarlikCrawler', +'GetintentCrawler[\x20]getintent\.com','GetintentCrawler getintent.com', +'Gigabot/','Gigabot', +'gipo\-crawler/Nutch','gipo-crawler/Nutch', +'Girafabot','Girafabot', +'Gluten[\x20]Free[\x20]Crawler/','Gluten Free Crawler', +'gocrawl','gocrawl', +'GrapeshotCrawler/','GrapeshotCrawler', +'GSiteCrawler/','GSiteCrawler', +'GurujiBot/','GurujiBot', +'HaosouSpider','HaosouSpider', +'holmes/','holmes', +'HTTP_Request2/','HTTP_Request2', +'HubSpot[\x20]Webcrawler','HubSpot Webcrawler', +'HyperCrawl/','HyperCrawl', +'ICC\-Crawler/','ICC-Crawler', +'iconoclast','iconoclast', +'IDGCrawler/Nutch','IDGCrawler/Nutch', +'idmarch[\x20]Automatic\.beta/','idmarch Automatic.beta', +'Incutio[\x20]XML','Incutio XML', +'InfluenceBot','InfluenceBot', +'IRLbot/','IRLbot', +'IssueCrawler','IssueCrawler', +'istellabot/','istellabot', +'James[\x20]BOT','James BOT', +'Jigsaw/','Jigsaw', +'JobFeed','JobFeed', +'KomodiaBot/','KomodiaBot', +'Konqueror/','Konqueror', +'linkapediabot','linkapediabot', +'metager\-linkchecker','metager-linkchecker', +'linkchecker','linkchecker', +'LinkCheck','LinkCheck', +'linkdexbot/','linkdexbot', +'LinkedInBot/','LinkedInBot', +'LinkpadBot/','LinkpadBot', +'Links[\x20]\(','Links (', +'LinksManager\.com_bot','LinksManager.com_bot', +'LWP::Simple/','LWP::Simple', +'Mail\.RU_Bot/','Mail.RU Bot', +'meanpathbot/','meanpathbot', +'Mediatoolkitbot','Mediatoolkitbot', +'MegaIndex\.ru/','MegaIndex.ru', +'merzscope','merzscope', +'mfibot/','mfibot', +'microsoft.*discovery','Microsoft Office Protocol Discovery', +'missigua_locator','missigua_locator', +'MixrankBot','MixrankBot', +'MJ12bot/','MJ12bot', +'MojeekBot/','MojeekBot', +'Mojolicious','Mojolicious', +'MXT/Nutch','MXT/Nutch', +'My[\x20]Nutch[\x20]Spider/','My Nutch Spider', +'myse/Nutch','myse/Nutch', +'NerdyBot','NerdyBot', +'netEstate[\x20]NE[\x20]Crawler','netEstate NE Crawler', +'NetResearchServer/','NetResearchServer', +'NRLCorpusBuilder/Nutch','NRLCorpusBuilder/Nutch', +'nutch\-1\.4/','nutch-1.4', +'nutch\-1\.8/','nutch-1.8', +'NutchCVS/','NutchCVS', +'o\.uk[\x20]robot','o uk.robot', +'oBot/','oBot', +'ocrawler;','ocrawler;', +'ODP[\x20]link[\x20]checker','ODP link checker', +'Offline[\x20]Explorer/','Offline Explorer', +'OmniExplorer_Bot/','OmniExplorer_Bot', +'OrangeBot/','OrangeBot', +'PageBitesHyperBot/','PageBitesHyperBot', +'pdffillerbot/','pdffillerbot', +'PhantomJS','PhantomJS', +'PHP/5\.2\.8','PHP/5.2.8', +'Ploetz[\x20]\+[\x20]Zeller','Ploetz + Zeller', +'Plukkie/','Plukkie', +'Princetonbot/','Princetonbot', +'PrivacyAwareBot/','PrivacyAwareBot', +'proximic','proximic', +'psbot/','psbot', +'psbot\-image','psbot-image', +'python_wk_crawler','python_wk_crawler', +'Python\-urllib/','Python-urllib', +'QCrawl','QCrawl', +'ResearchBot','ResearchBot', +'roboto','roboto', +'rogerbot/','rogerbot', +'RSSingBot','RSSingBot', +'RukiCrawler/','RukiCrawler', +'SafeDNS[\x20]search[\x20]bot/','SafeDNS search bot', +'SafeDNSBot','SafeDNSBot', +'SafeSearch[\x20]microdata[\x20]crawler','SafeSearch microdata crawler', +'safesearch','safesearch ( catchall )', +'SBL\-BOT','SBL-BOT', +'Screaming[\x20]Frog[\x20]SEO[\x20]Spider/','Screaming Frog SEO Spider', +'ScreenerBot[\x20]Crawler[\x20]Beta','ScreenerBot Crawler Beta', +'Searchie/','Searchie', +'Seekmo','Seekmo', +'semanticbot','semanticbot', +'SemrushBot/','SemrushBot', +'seo\-audit\-check\-bot/','seo-audit-check-bot', +'Seobility','Seobility', +'SEOkicks\-Robot','SEOkicks-Robot', +'SEOlyticsCrawler/','SEOlyticsCrawler', +'SEOstats','SEOstats', +'Seosys/Nutch','Seosys/Nutch', +'Seoterritory\.com[\x20]bot','Seoterritory.com.bot', +'Shim\-Crawler','Shim-Crawler', +'SiteExplorer/','SiteExplorer', +'siteexplorer\.info','siteexplorer.info', +'Slackbot\-LinkExpanding','Slackbot-LinkExpanding', +'SmabblerBot/','SmabblerBot', +'Sogou[\x20]web[\x20]spider/','Sogou web spider', +'special_archiver/','special_archiver', +'Spiderbot/','Spiderbot', +'ssearch_bot','ssearch_bot', +'SurdotlyBot/','SurdotlyBot', +'SurveyBot/','SurveyBot', +'taiil/Nutch','taiil/Nutch', +'tbot\-nutch','tbot-nutch', +'TeeRaidBot','TeeRaidBot', +'TelegramBot','TelegramBot', +'Test/Nutch','Test/Nutch', +'Test[\x20]Spider','Test Spider', +'TestCrawler','TestCrawler', +'TurnitinBot/','TurnitinBot', +'TurnitinBot','TurnitinBot', +'TweetmemeBot/','TweetmemeBot', +'UCY/Nutch','UCY/Nutch', +'Uptimebot/','Uptimebot', +'URL[\x20]Checker','URL Checker', +'UXCrawlerBot','UXCrawlerBot', +'Validator\.nu/','Validator.nu', +'vBSEO','vBSEO', +'vBulletin[\x20]via[\x20]PHP','vBulletin via PHP', +'vebidoobot','vebidoobot', +'viz/Nutch','viz/Nutch', +'VoilaBot','VoilaBot', +'VORTEX/','VORTEX', +'voyager/','voyager', +'W3C_Validator/','W3C_Validator', +'W3C\-checklink/','W3C-checklink', +'WBSearchBot/','WBSearchBot', +'WeSEE:Ads/PageBot','WeSEE:Ads/PageBot', +'WeSEE:Ads/PictureBot','WeSEE:Ads/PictureBot', +'WeSEE_Bot','WeSEE_Bot', +'Wget/','Wget', +'Who\.is[\x20]Bot','Who.is.Bot', +'wonderbot/','wonderbot', +'woobot/','woobot', +'Wotbox/','Wotbox', +'Xaldon[\x20]WebSpider','Xaldon WebSpider', +'Xenu[\x20]Link[\x20]Sleuth','Xenu Link Sleuth', +'xenu_link_sleuth','xenu_link_sleuth', +'XML[\x20]Sitemaps[\x20]Generator','XML Sitemaps Generator', +'XoviBot/','XoviBot', +'yacybot','yacybot', +'Yahoo[\x20]Link[\x20]Preview','Yahoo Link Preview', +'YisouSpider','YisouSpider', +'yoozBot','yoozBot', +'zspider/','zspider', +'ZumBot/','ZumBot', +# below placed at end to catch some generics +'nbot','nbot', +'ng/1\.','ng/1.', +'ng/2\.','ng/2.', +'libwww\-perl','libwww-perl', +'CFNetwork','CFNetwork', +'urllib','urllib', +'javabee','javabee', +'projectwf\-java\-test\-crawler','projectwf-java-test-crawler', +'java','Java ( catchall )', +'loocalcrawler/nutch','loocalcrawler/nutch', +'nutchosu\-vlib','nutchosu-vlib', +'nutch','nutch ( catchall )', +'perlcrawler','perlcrawler', +'perl','perl', +'(firefox/)([0-9]\.|[0-1][0]\.)','Firefox version 10 and lower - various robots', + +# Less common robots (In robot file) +'^Mozilla$','Mozilla ( Rogue Robot )', +'^mozilla\/3\.0\s\(compatible$', 'mozilla/3.0 (compatible - ( Rogue Robot )', +'^mozilla\/4\.0$', 'mozilla/4.0 - ( Rogue Robot )', +'^mozilla\/4\.0\s\(compatible;\)$', 'mozilla/4.0 (compatible;) - ( Rogue Robot )', +'^mozilla\/5\.0$', 'mozilla/5.0 - ( Rogue Robot )', +'^mozilla\/5\.0\s\(compatible;$', 'mozilla/5.0 (compatible; - ( Rogue Robot )', +'^mozilla\/5\.0\s\(en\-us\)$', 'mozilla/5.0 (en-us) - ( Rogue Robot )', +'^mozilla\/5\.0\sfirefox\/3\.0\.5$', 'mozilla/5.0 firefox/3.0.5 - ( Rogue Robot )', +'^Mozilla/6\.0[\x20]\(compatible\)$','Mozilla/6.0 (compatible) - ( Rogue Robot )', +'^Mozilla/(.*)Beta[\x20]\(Windows\)','Mozilla Beta (Windows) - ( Rogue Robot )', +'MSIE[\x20]2','MSIE 2 - ( Rogue Robot )', +'MSIE[\x20]3','MSIE 3 - ( Rogue Robot )', +'MSIE[\x20]4','MSIE 4 - ( Rogue Robot )', +'MSIE[\x20]5','MSIE 5 - ( Rogue Robot )', +'MSIE[\x20]6','MSIE 6 - ( Rogue Robot )', +'Windows[\x20]95','Windows 95 - ( Rogue Robot )', +'Windows[\x20]98','Windows 99 - ( Rogue Robot )', + +# these could be removed to speed up processing as they are rarely seen +'a6\-indexer','a6-indexer', +'abcdatos','abcdatos', +'abonti\.com','abonti.com', +'acme\.spider','acme.spider', +'activebookmark','activebookmark', +'adamm_bot','adamm_bot', +'advbot','advbot', +'affectv\.co\.uk','affectv.co.uk', +'ahoythehomepagefinder','ahoythehomepagefinder', +'aleadsoftbot','aleadsoftbot', +'alkaline','alkaline', +'allrati','allrati', +'alltop','alltop', +'almaden','almaden', +'alpha_search_agent','alpha_search_agent', +'anthill','anthill', +'antibot','antibot', +'aport','aport', +'appie','appie', +'applesyndication','applesyndication', +'arachnophilia','arachnophilia', +'arale','arale', +'araneo','araneo', +'architext','architext', +'archive\-de\.com','archive-de.com', +'aretha','aretha', +'argus','argus', +'ariadne','ariadne', +'arianna\.libero\.it','arianna.libero.it', +'arks','arks', +'aspider','aspider', +'aspseek','aspseek', +'asterias','asterias', +'asynchttpclient','asynchttpclient', +'atn\.txt','atn.txt', +'atomz','atomz', +'auresys','auresys', +'awbot','awbot', +'backlinktest\.com','backlinktest.com', +'backrub','backrub', +'bbot','bbot', +'becomebot','becomebot', +'bender','bender', +'betabot','betabot', +'bigbrother','bigbrother', +'biglotron','biglotron', +'BingLocalSearch','BingLocalSearch', +'bittorrent_bot','bittorrent_bot', +'biz360[_+\s]spider','biz360 spider', +'bjaaland','bjaaland', +'blackwidow','blackwidow', +'blindekuh','blindekuh', +'blogbridge[_+\s]service','blogbridge service', +'blogged_crawl','blogged_crawl', +'bloglines','bloglines', +'bloglovin','bloglovin', +'blogpulse','blogpulse', +'blogsearch','blogsearch', +'blogshares','blogshares', +'blogslive','blogslive', +'blogssay','blogssay', +'bloodhound','bloodhound', +'bncf\.firenze\.sbn\.it/raccolta\.txt','bncf\.firenze\.sbn.it/raccolta.txt', +'bobby','bobby', +'bookmark\-manager','bookmark-manager', +'borg\-bot','borg-bot', +'boris','boris', +'brightnet','brightnet', +'bruinbot','bruinbot', +'bspider','bspider', +'bubing','bubing', +'bumblebee','bumblebee', +'butterfly','butterfly', +'buzztracker','buzztracker', +'cactvschemistryspider','cactvschemistryspider', +'calif[^r]','calif[^r]', +'candlelight[_+\s]favorites[_+\s]inspector','candlelight favorites inspector', +'careerbot','careerbot', +'carpathia','carpathia', +'cassandra','cassandra', +'catbot','catbot', +'cbn00glebot','cbn00glebot', +'cerberian_drtrs','cerberian_drtrs', +'cfetch','cfetch', +'cgireader','cgireader', +'chattertrap','chattertrap', +'check_http','check_http', +'checkbot','checkbot', +'checkweb_link_validator','checkweb_link_validator', +'christcrawler','christcrawler', +'churl','churl', +'cienciaficcion','cienciaficcion', +'cipinetbot','cipinetbot', +'imagecoccoc','imagecoccoc', +'coccoc','coccoc', +'coldfusion','coldfusion', +'collective','collective', +'combine','combine', +'commons\-httpclient','commons-httpclient', +'computer_and_automation_research_institute_crawler','computer_and_automation_research_institute_crawler', +'conceptbot','conceptbot', +'contentmatch','contentmatch', +'converamultimediacrawler','converamultimediacrawler', +'coolbot','coolbot', +'copubbot','copubbot', +'core','core', +'covario','covario', +'cruiser','cruiser', +'cscrawler','cscrawler', +'cuasarbot','cuasarbot', +'cursor','cursor', +'cusco','cusco', +'custo','custo', +'cyberspyder','cyberspyder', +'datafountains/dmoz_downloader','datafountains/dmoz_downloader', +'dataprovider\.com','dataprovider.com', +'daumoa','daumoa', +'daviesbot','daviesbot', +'daylifefeedfetcher','daylifefeedfetcher', +'daypopbot','daypopbot', +'deepindex','deepindex', +'desertrealm','desertrealm', +'deweb','deweb', +'dienstspider','dienstspider', +'digger','digger', +'digout4u','digout4u', +'diibot','diibot', +'dipsie\.bot','dipsie.bot', +'direct_hit','direct_hit', +'discobot','discobot', +'dlvr\.it','dlvr.it', +'dnabot','dnabot', +'dnsgroup','dnsgroup', +'doccheckbot','doccheckbot', +'domainappender','domainappender', +'domainchecker','domainchecker', +'domainsdb\.net','domainsdb.net', +'download_express','download_express', +'dragonbot','dragonbot', +'dreamwidth','dreamwidth', +'drupal','drupal', +'dulance','dulance', +'dumbot','dumbot', +'dumm\.de\-bot','dumm.de-bot', +'dwcp','dwcp', +'e\-collector','e-collector', +'earthcom\.info','earthcom.info', +'easydl','easydl', +'ebiness','ebiness', +'eccp','eccp', +'echo!','echo!', +'edgeio\-retriever','edgeio-retriever', +'elfinbot','elfinbot', +'emacs','emacs', +'emcspider','emcspider', +'enteprise','enteprise', +'ernst[:blank:]2\.0','ernst[:blank:]2.0', +'esther','esther', +'ets_v','ets_v', +'eventax','eventax', +'everbeecrawler','everbeecrawler', +'everest\-vulcan','everest-vulcan', +'evliyacelebi','evliyacelebi', +'exactseek','exactseek', +'extreme[_+\s]picture[_+\s]finder','extreme picture finder', +'ezoom','ezoom', +'ezresult','ezresult', +'facebook','facebook', +'facebot','facebot', +'fast\-search\-engine','fast-search-engine', +'matrix_s\.p\.a\._\-_fast_enterprise_crawler','matrix_s.p.a._-_fast_enterprise_crawler', +'fast_enterprise_crawler','fast_enterprise_crawler', +'fastbot','fastbot', +'fastcrawler','fastcrawler', +'favicon','favicon', +'favorg','favorg', +'favorites_sweeper','favorites_sweeper', +'fdse','fdse', +'feedburner','feedburner', +'feedcrawl','feedcrawl', +'feedflow','feedflow', +'feedmyinbox','feedmyinbox', +'feedroll\.com','feedroll.com', +'feedsky','feedsky', +'feedster','feedster', +'feedvalidator','feedvalidator', +'feedzira','feedzira', +'felix','felix', +'ferret','ferret', +'fetchbot','fetchbot', +'fetchrover','fetchrover', +'fever/','fever', +'fido','fido', +'filmkamerabot','filmkamerabot', +'filterdb\.iss\.net','filterdb.iss.net', +'finderlein[_+\s]research[_+\s]crawler','finderlein research crawler', +'findexa_crawler','findexa_crawler', +'finnish','finnish', +'fireball','fireball', +'firmilybot','firmilybot', +'flexum','flexum', +'foaf\-search\.net','foaf-search.net', +'fooky\.com/ScorpionBot','fooky.com/ScorpionBot', +'fouineur','fouineur', +'francoroute','francoroute', +'freecrawl','freecrawl', +'freenews','freenews', +'funnelweb','funnelweb', +'g2crawler','g2crawler', +'gama','gama', +'gazz','gazz', +'gcreep','gcreep', +'geniebot','geniebot', +'genieo','genieo', +'geohasher','geohasher', +'getbot','getbot', +'geturl','geturl', +'gigablastopensource','gigablastopensource', +'global_fetch','global_fetch', +'gnodspider','gnodspider', +'goforit\.com','goforit.com', +'goforitbot','goforitbot', +'golem','golem', +'gonzo','gonzo', +'gougou','gougou', +'gpu_p2p_crawler','gpu_p2p_crawler', +'grabber','grabber', +'grapeshot','grapeshot', +'grapnel','grapnel', +'griffon','griffon', +'gromit','gromit', +'grub','grub', +'gulliver','gulliver', +'gulperbot','gulperbot', +'hambot','hambot', +'hanrss','hanrss', +'harvest','harvest', +'havindex','havindex', +'henrythemiragorobot','henrythemiragorobot', +'heritrix','heritrix', +'hl_ftien_spider','hl_ftien_spider', +'hometown','hometown', +'hoowwwer','hoowwwer', +'hpprint','hpprint', +'htdig','htdig', +'html[_+\s]link[_+\s]validator','html link validator', +'htmlgobble','htmlgobble', +'htmlparser','htmlparser', +'httrack','httrack', +'hundesuche\.com\-bot','hundesuche.com-bot', +'hyperdecontextualizer','hyperdecontextualizer', +'ia_archiver\-web\.archive\.org','ia_archiver-web.archive.org', +'ia_archiver','ia_archiver', +'iajabot','iajabot', +'iaskspider','iaskspider', +'i\-bot','i-bot', +'icarus6j','icarus6j', +'ichiro','ichiro', +'icjobs\.de','icjobs.de', +'ilse','ilse', +'iltrovatore\-setaccio','iltrovatore-setaccio', +'imagelock','imagelock', +'implisensebot','implisensebot', +'inagist','inagist', +'incywincy','incywincy', +'infobot','infobot', +'infociousbot','infociousbot', +'infohelfer','infohelfer', +'infomine','infomine', +'informant','informant', +'infoseeksidewinder','infoseeksidewinder', +'infoseek','infoseek', +'infospider','infospider', +'inspectorwww','inspectorwww', +'insurancobot','insurancobot', +'integromedb\.org','integromedb.org', +'intelliagent','intelliagent', +'internet[_+\s]ninja','internet ninja', +'internetarchive','internetarchive', +'internetseer','internetseer', +'internetsupervision','internetsupervision', +'ips\-agent','ips-agent', +'irobot','irobot', +'iron33','iron33', +'isearch2006','isearch2006', +'israelisearch','israelisearch', +'iupui_research_bot','iupui_research_bot', +'izsearch','izsearch', +'jacobin[\x20]club','jacobin club', +'jakarta','jakarta', +'jbot','jbot', +'jcrawler','jcrawler', +'jeeves','jeeves', +'jennybot','jennybot', +'jobboerse','jobboerse', +'jobot','jobot', +'jobo','jobo', +'joebot','joebot', +'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility','jrtwine software check favorites utility', +'js\-kit','js-kit', +'jubii','jubii', +'jumpstation','jumpstation', +'justview','justview', +'kalambot','kalambot', +'kamano\.de_newsfeedverzeichnis','kamano.de_newsfeedverzeichnis', +'kapsi','kapsi', +'katipo','katipo', +'kazoombot','kazoombot', +'kevin','kevin', +'keyoshid','keyoshid', +'kilroy','kilroy', +'kinja\-imagebot','kinja-imagebot', +'kinjabot','kinjabot', +'knowitall','knowitall', +'knowledge\.com','knowledge.com', +'ko[_+\s]yappo[_+\s]robot','ko yappo robot', +'kouaa_krawler','kouaa_krawler', +'krugle','krugle', +'ksibot','ksibot', +'kummhttp','kummhttp', +'kurzor','kurzor', +'labelgrabber\.txt','labelgrabber.txt', +'lanshanbot','lanshanbot', +'larbin','larbin', +'largesmall[\x20]crawler','largesmall crawler', +'legs','legs', +'letscrawl\.com','letscrawl.com', +'libcrawl','libcrawl', +'lilina','lilina', +'link_valet_online','link_valet_online', +'linkbot','linkbot', +'linkdex\.com','linkdex.com', +'linkidator','linkidator', +'linkscan','linkscan', +'linkstats[\x20]bot','linkstats bot', +'linkwalker','linkwalker', +'lipperhey','lipperhey', +'livejournal\.com','livejournal.com', +'lmspider','lmspider', +'loadtimebot','loadtimebot', +'lockon','lockon', +'logo_gif','logo_gif', +'longurl','longurl', +'lssrocketcrawler','lssrocketcrawler', +'ltbot','ltbot', +'ltx71','ltx71', +'lwp\-request','lwp-request', +'lwp\-trivial','lwp-trivial', +'lycos[_+\s]','lycos ', +'macworm','macworm', +'madaali\.de','madaali.de', +'magpierss','magpierss', +'magpie','magpie', +'mapoftheinternet\.com','mapoftheinternet.com', +'marvin','marvin', +'mattie','mattie', +'mediabot','mediabot', +'mediafox','mediafox', +'megaindex','megaindex', +'megite','megite', +'memorybot','memorybot', +'mercator','mercator', +'meshexplorer','meshexplorer', +'metager2\-verification\-bot','metager2-verification-bot', +'metajobbot','metajobbot', +'metaspinner','metaspinner', +'metauri','metauri', +'miadev','miadev', +'microsoft[_+\s]url[_+\s]control','microsoft url control', +'microsoft[\x20]bits','microsoft bits', +'microsoft\-webdav\-miniredir','microsoft-webdav-miniredir', +'mindcrawler','mindcrawler', +'mindupbot','mindupbot', +'mini\-reptile','mini-reptile', +'minirank','minirank', +'misterbot','misterbot', +'miva','miva', +'mizzu_labs','mizzu_labs', +'mnogosearch','mnogosearch', +'moget','moget', +'momspider','momspider', +'monster','monster', +'motor','motor', +'movabletype','movabletype', +'ms[_+\s]search[_+\s]6\.0[_+\s]robot','ms search 6.0 robot', +'ms_search_4\.0_robot','ms_search_4.0_robot', +'msnbot\-udiscovery','msnbot-udiscovery', +'msrabot','msrabot', +'msrbot','msrbot', +'mt::telegraph::agent','mt::telegraph::agent', +'muncher','muncher', +'muscatferret','muscatferret', +'mwdsearch','mwdsearch', +'mydoyouhike','mydoyouhike', +'myweb','myweb', +'nagios','nagios', +'nasa_search','nasa_search', +'ndspider','ndspider', +'nederland\.zoek','nederland.zoek', +'netcarta','netcarta', +'netcraft','netcraft', +'netluchs','netluchs', +'netmechanic','netmechanic', +'netnewswire','netnewswire', +'netscoop','netscoop', +'netsprint','netsprint', +'netvibes','netvibes', +'newrelicpinger','newrelicpinger', +'newscan\-online','newscan-online', +'newsfox','newsfox', +'newsgatoronline','newsgatoronline', +'nextgensearchbot','nextgensearchbot', +'nhse','nhse', +'nicebot','nicebot', +'nimblecrawler','nimblecrawler', +'ning','ning', +'nomad','nomad', +'northstar','northstar', +'noxtrumbot','noxtrumbot', +'npbot','npbot', +'nzexplorer','nzexplorer', +'objectssearch','objectssearch', +'occam','occam', +'ocelli','ocelli', +'octopus','octopus', +'octora_beta_bot','octora_beta_bot', +'onet\.pl[_+\s]sa','onet.pl sa', +'onfolio','onfolio', +'openfind','openfind', +'opentaggerbot','opentaggerbot', +'openwebspider','openwebspider', +'optimizer','optimizer', +'oracle_ultra_search','oracle_ultra_search', +'orb_search','orb_search', +'orbiter','orbiter', +'packrat','packrat', +'pageboy','pageboy', +'panscient','panscient', +'parasite','parasite', +'passwordmaker\.org','passwordmaker.org', +'patric','patric', +'pear_http_request_class','pear_http_request_class', +'peerbot','peerbot', +'pegasus','pegasus', +'perignator','perignator', +'perman','perman', +'petersnews','petersnews', +'phantom','phantom', +'php[_+\s]version[_+\s]tracker','php version tracker', +'phpcrawl','phpcrawl', +'phpdig','phpdig', +'picmole','picmole', +'pictureofinternet','pictureofinternet', +'piltdownman','piltdownman', +'pimptrain','pimptrain', +'ping\.blo\.gs','ping.blo.gs', +'pingdom','pingdom', +'pioneer','pioneer', +'pita','pita', +'pitkow','pitkow', +'pjspider','pjspider', +'plinki','plinki', +'pluckfeedcrawler','pluckfeedcrawler', +'plumtreewebaccessor','plumtreewebaccessor', +'pogodak','pogodak', +'pompos','pompos', +'popdexter','popdexter', +'poppi','poppi', +'port_huron_labs','port_huron_labs', +'portalb','portalb', +'postfavorites','postfavorites', +'postpost','postpost', +'postrank','postrank', +'powermarks','powermarks', +'printfulbot','printfulbot', +'proodlebot','proodlebot', +'protopage','protopage', +'publiclibraryarchive','publiclibraryarchive', +'pyquery','pyquery', +'python','python', +'qihoobot','qihoobot', +'quipply','quipply', +'qwantify','qwantify', +'r6\_','r6\_', +'rambler','rambler', +'ratingburner','ratingburner', +'raven','raven', +'rbse','rbse', +'redalert','redalert', +'regator','regator', +'relevantnoise\.com','relevantnoise.com', +'resumerobot','resumerobot', +'rhcs','rhcs', +'riddler','riddler', +'road_runner','road_runner', +'robbie','robbie', +'robi','robi', +'robocrawl','robocrawl', +'robofox','robofox', +'robozilla','robozilla', +'rojo','rojo', +'rome[\x20]client','rome client', +'roverbot','roverbot', +'rpt\-httpclient','rpt-httpclient', +'rssgraffiti','rssgraffiti', +'rssimagesbot','rssimagesbot', +'ruffle','ruffle', +'rufusbot','rufusbot', +'rules','rules', +'safeads\.xyz','safeads.xyz', +'safetynetrobot','safetynetrobot', +'sage\+\+','sage++', +'sandcrawler','sandcrawler', +'savetheworldheritage','savetheworldheritage', +'sbider','sbider', +'schizozilla','schizozilla', +'scooter','scooter', +'scoutjet','scoutjet', +'scumbot','scumbot', +'search\-info','search-info', +'search_au','search_au', +'searchguild[_+\s]dmoz[_+\s]experiment','searchguild dmoz experiment', +'searchmetricsbot','searchmetricsbot', +'searchprocess','searchprocess', +'seekbot','seekbot', +'semalt','semalt', +'senrigan','senrigan', +'sensis_web_crawler','sensis_web_crawler', +'seodiver','seodiver', +'seokicks\.de','seokicks.de', +'seoscanners','seoscanners', +'sgscout','sgscout', +'shaggy','shaggy', +'shaihulud','shaihulud', +'shareaholicbot','shareaholicbot', +'shoutcast','shoutcast', +'sift','sift', +'simbot','simbot', +'simplepie','simplepie', +'sistrix','sistrix', +'site\-valet','site-valet', +'sitebot','sitebot', +'sitedomain\-bot','sitedomain-bot', +'sitetech','sitetech', +'skimbot','skimbot', +'skymob','skymob', +'slcrawler','slcrawler', +'slurp','slurp', +'slysearch','slysearch', +'smartspider','smartspider', +'smtbot','smtbot', +'snap\.com_beta_crawler','snap.com_beta_crawler', +'snappy','snappy', +'snooper','snooper', +'sohu\-search','sohu-search', +'sohu','sohu ( catchall )', +'solbot','solbot', +'speedy','speedy', +'sphere_scout','sphere_scout', +'spider[_+\s]monkey','spider monkey', +'spiderline','spiderline', +'spiderlytics','spiderlytics', +'spiderman','spiderman', +'spiderview','spiderview', +'spip','spip', +'sproose_crawler','sproose_crawler', +'spry','spry', +'sqworm','sqworm', +'ssearcher','ssearcher', +'steeler','steeler', +'steroid__download','steroid__download', +'stq_bot','stq_bot', +'Stratagems[\x20]Kumo','Stratagems Kumo', +'suchfin\-bot','suchfin-bot', +'suke','suke', +'summify\.com','summify.com', +'sunrise','sunrise', +'suntek','suntek', +'superbot','superbot', +'superfeedr','superfeedr', +'susie','susie', +'sven','sven', +'syndic8','syndic8', +'syndicapi','syndicapi', +'synoobot','synoobot', +'synthesio','synthesio', +'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','t-h-u-n-d-e-r-s-t-o-n-e', +'tach_bw','tach_bw', +'tagyu_agent','tagyu_agent', +'tailrank','tailrank', +'tarantula','tarantula', +'tarspider','tarspider', +'tcl_http_client_package','tcl_http_client_package', +'techbot','techbot', +'technoratibot','technoratibot', +'templeton','templeton', +'teoma','teoma', +'teragramcrawlersurf','teragramcrawlersurf', +'test_crawler','test_crawler', +'testbot','testbot', +'thumbsniper','thumbsniper', +'titan','titan', +'titin','titin', +'tkwww','tkwww', +'tlspider','tlspider', +'topblogsinfo','topblogsinfo', +'topicblogs','topicblogs', +'topix\.net','topix.net', +'trapit','trapit', +'trileet','trileet', +'turtlescanner','turtlescanner', +'turtle','turtle', +'tutorgigbot','tutorgigbot', +'tweetedtimes','tweetedtimes', +'twiceler','twiceler', +'twisted[\x20]pagegetter','twisted pagegetter', +'twitterbot','twitterbot', +'twitterfeed','twitterfeed', +'ubicrawler','ubicrawler', +'ucsd','ucsd', +'udmsearch','udmsearch', +'ultraseek','ultraseek', +'unchaos_bot_hybrid_web_search_engine','unchaos_bot_hybrid_web_search_engine', +'unido\-bot','unido-bot', +'unisterbot','unisterbot', +'universalfeedparser','universalfeedparser', +'unlost_web_crawler','unlost_web_crawler', +'unwindfetchor','unwindfetchor', +'updated','updated', +'urlck','urlck', +'ustc\-semantic\-group','ustc-semantic-group', +'vagabondo\-wap','vagabondo-wap', +'vagabondo','vagabondo', +'valkyrie','valkyrie', +'vermut','vermut', +'versus_crawler_from_eda\.baykan@epfl\.ch','versus_crawler_from_eda.baykan@epfl.ch', +'verticrawl','verticrawl', +'vespa_crawler','vespa_crawler', +'victoria','victoria', +'virus[_+\s]detector','virus_detector', +'visionsearch','visionsearch', +'voidbot','voidbot', +'voltron','voltron', +'vse/','vse', +'vwbot','vwbot', +'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa','w3c_css_validator_jfouffa', +'w3index','w3index', +'w3m2','w3m2', +'wallpaper','wallpaper', +'wanderer','wanderer', +'wapspider','wapspider', +'wapspIRLider','wapspIRLider', +'watchmouse','watchmouse', +'wavefire','wavefire', +'waybackarchive\.org','waybackarchive.org', +'wazzup','wazzup', +'web_downloader','web_downloader', +'webbandit','webbandit', +'webbase','webbase', +'webcatcher','webcatcher', +'webclipping\.com','webclipping.com', +'webcollage','webcollage', +'webcompass','webcompass', +'webcopy','webcopy', +'webcrawl\.net','webcrawl.net', +'webdup','webdup', +'webfetcher','webfetcher', +'webfilter','webfilter', +'webfoot','webfoot', +'webinator','webinator', +'webindexer','webindexer', +'weblayers','weblayers', +'weblinker','weblinker', +'webminer','webminer', +'webmirror','webmirror', +'webmoose','webmoose', +'webquest','webquest', +'webreader','webreader', +'webreaper','webreaper', +'website[_+\s]monitoring[_+\s]bot','website monitoring bot', +'websnarf','websnarf', +'webspider','webspider', +'webvac','webvac', +'webvulncrawl','webvulncrawl', +'webwalker','webwalker', +'webwalk','webwalk', +'webwatch','webwatch', +'wells_search','wells_search', +'wer\-liefert\-was','wer-liefert-was', +'wesee:search','wesee:search', +'wevikabot','wevikabot', +'whatuseek','whatuseek', +'whowhere','whowhere', +'windows\-rss\-platform','windows-rss-platform', +'wired\-digital','wired-digital', +'zyborg','zyborg', +'wisenutbot','wisenutbot', +'wiumi','wiumi', +'wmir','wmir', +'wolp','wolp', +'wombat','wombat', +'wonderer','wonderer', +'woozweb','woozweb', +'wordpress','wordpress', +'worm','worm', +'wume_crawler','wume_crawler', +'wwwc','wwwc', +'wwweasel','wwweasel', +'wz101','wz101', +'xget','xget', +'xirq','xirq', +'xydo','xydo', +'y!j','y!j', +'yahoo![\x20]searchmonkey','yahoo! searchmonkey', +'yahoo!_mindset','yahoo!_mindset', +'yahoo\-blogs','yahoo-blogs', +'yahoo\-mmcrawler','yahoo-mmcrawler', +'yahoo\-newscrawler','yahoo-newscrawler', +'yahoo[\x20]pipes','yahoo pipes', +'yahoo\-verticalcrawler','yahoo-verticalcrawler', +'yahoocachesystem','yahoocachesystem', +'yahooexternalcache','yahooexternalcache', +'yahoofeedseeker','yahoofeedseeker', +'yahooseeker\-testing','yahooseeker-testing', +'yahooseeker','yahooseeker', +'yahooysmcm','yahooysmcm', +'yammer','yammer', +'yanga','yanga', +'yet\-another\-spider','yet-another-spider', +'yeti','yeti', +'yie8','yie8', +'yodaobot','yodaobot', +'yooglifetchagent','yooglifetchagent', +'youdao','youdao', +'yourls','yourls', +'z\-add_link_checker','z-add_link_checker', +'zealbot','zealbot', +'zemanta','zemanta', +'zend_http_client','zend_http_client', +'zeus','zeus', +'zhuaxia','zhuaxia', +'[^a]fish','[^a]fish', +'[\x20]netseer[\x20]',' netseer ', +'^[1-3]$','^[1-3]$', +'^finbot','^finbot', +'^motorola$','^motorola$', +'^msie','^msie', +'^voyager/','^voyager', +'^webindex$','webindex', +'1\-more_scanner','1-more_scanner', + +# Generic robot +'robot','robot', +'blog','blog', +'checker','checker', +'crawl','crawl', +'discover','discover', +'feed','feed', +'fetcher','fetcher', +'hunter','hunter', +'link','link', +'scanner','scanner', +'seek','seek', +'sitemap','sitemap', +'spider','spider', +'sucker','sucker', +'validator','validator', +'bot[\s_+:,\.\;\/\\\-]','Unknown robot identified by bot\*', +'[\s_+:,\.\;\/\\\-]bot','Unknown robot identified by \*bot', +'curl','Curl', +'php','A PHP script', +'ruby/','Ruby script', +'no_user_agent','empty user agent string', +# Unknown robots identified by hit on robots.txt +'unknown','Unknown robot (identified by hit on robots.txt)' +); + + +# RobotsAffiliateLib +# This list try to tell by which Search Engine a robot is used +#------------------------------------------------------------- +%RobotsAffiliateLib = ( +); + +1; diff --git a/wwwroot/cgi-bin/lib/search_engines.pm b/wwwroot/cgi-bin/lib/search_engines.pm index e56b0080..ad3d3bda 100644 --- a/wwwroot/cgi-bin/lib/search_engines.pm +++ b/wwwroot/cgi-bin/lib/search_engines.pm @@ -1,1578 +1,4491 @@ -# AWSTATS SEARCH ENGINES DATABASE -#------------------------------------------------------------------------------ -# If you want to add a Search Engine to extend AWStats database detection capabilities, -# you must add an entry in SearchEnginesSearchIDOrder, SearchEnginesHashID and in -# SearchEnginesHashLib. -# An entry if known in SearchEnginesKnownUrl is also welcome. -# -# to eldy: Please check if the following description is correct: -# You need the following information to specify a search engine: -# (a) A regular expression that matches the referrer string of the -# search engine. Unclear: What about slashes in the name of -# a search engine, e.g. as in 'ecosia.com/search'. Seems that -# AWStats will non find search strings containing a slash. -# Maybe use a search string without a slash, and - if necessary - -# an entry in %NotSearchEnginesKeys , if this search string -# matches entries that are not search engines. -# Example of a web address of a Amazon search engine: -# http://www.amazon.de/gp/bit/apps/web/SERP/search/ref=bit_bds-p24_serp_cr_de?ie=UTF8tagbase=bds-p24&query=deutsch+8.+klasse+gymnasium+protokoll -# (b) A unique string to identify the search engine within AWStats -# (c) A regular expression that finds the start of the query part in the -# referrer string -# (d) A HTML-fragment that goes into the reports generated by AWStats which -# identifies the search engine to human reader of the report. In the -# simplest case this is a string containing the name of the search -# engine. You can also provide a hypertext clause that presents the -# name together with a link to the search engine. -# -# The regular expression (a) goes into SearchEnginesSearchIDOrder_list1 -# or ..._list2. List 1 contains common search engines, list 2 those -# that are not so often used. -# -# SearchEnginesHashID contains to consecutive entries for each search -# engine: The regular expression (a) followed bei the search engine -# identifier (b) -# -# SearchEnginesKnownUrl specifies how to find the start of the query. -# For each search engine you enter the search engine identifier (b) -# followed by the regular expression (c). Unclear: It is possible to -# omit this entry. If you do this, how will AWStats find the start of -# the query? -# -# SearchEnginesHashLib contains also two entries for each search engine: -# The search engine identifier (b) followed by the HTML-Fragment (d) -# -# There are search engines that do not use a query part in their URLs. -# They put the search expression in the main part of the URL instead. -# AWStats is able to handle these cases. They are specified as described -# above, except the following two things: -# - The regular expression (c) searches the complete URL and not only -# the query part. -# - An additional Entry in the list %SearchEnginesWithKeysNotInQuery is -# necessary. -# -# -# AWStats runs a sanity check of the contents of search_engines.pm. This -# check detects the following things: -# - Inconsistencies (number of entries) -# It does not detect the following errors: -# - If the HTML-Fragment (d) is syntactically incorrect. -# -#------------------------------------------------------------------------------ - -# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html -# added minor italian search engines -# arianna http://arianna.libero.it/ -# supereva http://search.supereva.com/ -# kataweb http://kataweb.it/ -# corrected uk looksmart -# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=', -# to -# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=', -# corrected spelling -# internationnal -> international -# added 'google\.'=>'mail\.google\.', to NotSearchEnginesKeys in order to -# avoid counting gmail referrals as search engine traffic -# 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html -# avoid counting babelfish.altavista referrals as search engine traffic -# avoid counting translate.google referrals as search engine traffic -# 2005-11-20 Sean Carlos -# added missing 'tiscali','key=', entry. Check order -# 2005-11-22 Sean Carlos -# added Google Base & Froogle. Froogle not tested. -# 2006-04-18 Sean Carlos http://www.antezeta.com/awstats.html -# added biglotron.com (France) -# added blingo http://www.blingo.com/ -# added Clusty & Vivisimo -# added eniro.no (Norway) [https://sourceforge.net/forum/message.php?msg_id=3134783] -# added GPU p2p search http://search.centraldatabase.org/ -# added mail.tiscali to "not search engines list" [https://sourceforge.net/forum/message.php?msg_id=3166688] -# added Ask group's "mysearch" -# added sify.com (India) -# added sogou.com (Cina) [https://sourceforge.net/forum/message.php?msg_id=3501603] -# Ask changes: -# - added Ask Japan (ask.jp) -# - break out Ask new country level variants (DE, ES, FR, IT, NL) -# - updated Ask name from Ask Jevees -# - added Ask q= parameter - many recent searches probably not recognized; [https://sourceforge.net/forum/message.php?msg_id=3465444] -# - updated Ask uk (new uk.ask.com added to older ask.co.uk) -# updated voila kw|rdata parameter [https://sourceforge.net/forum/message.php?msg_id=3373912] -# for each new engine, added link to Search Engine. This serves to document engine. Done for major & Italian engines as well. Requires patch -# to AWStats to allow untranslated html. Otherwise html will appear instead of link. -# reviewed mnoGoSearch (http://www.mnogosearch.org/); the search engined mentioned no longer -# exists https://sourceforge.net/forum/message.php?msg_id=3025426 -# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html -# added 10 Chello European broadband portals (Austria, Belgium, Czech Republic, France, Hungary, The Netherlands, Norway, Poland, Slovakia, Sweden) -# added Alice Internal Search (blends data with Google?) search.alice.it.master:10005 -# added detection of google cache views from IPs 66.249.93.104 72.14.203.104 72.14.207.104 -# To do: add more extensive IP list; keywords not yet detected. -# added icerocket.com blog search http://www.icerocket.com/ -# added live.com (msn) http://www.live.com/ -# added Meta motor kartoo. Note: Kartoo does not provide search words in referrers, thus the engine will appear in the -# search engine list but the actual search words are not available. -# added netluchs.de http://www.netluchs.de/ -# added sphere.com blog search http://www.sphere.com/ -# added wwweasel.de http://wwweasel.de -# added Yahoo Mindset! http://mindset.research.yahoo.com/ -# updated Mirago query parameter recognition (qry=); added breakout for each country (France, Germany, Spain, Italy, Norway, Sweden, Denmark, Netherlands, Belgium, Switzerland) -# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html -# added Google cache IPs 64.233.183.104 & 66.102.7.104 -# 2006-05-20 Sean Carlos http://www.antezeta.com/awstats.html -# anzwers.com.au -# schoenerbrausen.de http://www.schoenerbrausen.de/ -# added Google cache IP 216.239.59.104 -# answerbus http://www.answerbus.com/ (does not provide keywords) -# 2006-05-23 Sean Carlos http://www.antezeta.com/awstats.html -# added Google cache IP 66.102.9.104, 64.233.161.104 -# 2006-06-23 Sean Carlos http://www.antezeta.com/awstats.html -# added Alice Search search.alice.it -# added GoodSearch http://www.goodsearch.com/ (does not provide keywords) "a Yahoo-powered search engine that donates money to your favorite charity or school each time you search the web" -# added googlee.com, variant of Google -# added gotuneed http://www.gotuneed.com/ Italian search engine, in beta -# added icq.com -# added logic to parse Google Cache search keywords. Seems to work for alpha but not numeric cache IDs, i.e. search?q=cache:lWVLmnuGJswJ: is recognized but q=cache:Yv5qxeJNuhgJ: is not recognized. The URL triggering the keywords will also appear. The URLs are probably too varied to parse out? -# added Nusearch http://www.nusearch.com/ -# added Polymeta www.polymeta.hu (does not provide keywords) -# added scroogle http://www.scroogle.org/ (does not always provide keywords) -# added Tango http://tango.hu/search.php?st=0&q=jeles+napok -# Changed Google Cache notation 64\.233\.(161|167|179|183|187)\.104 to 64\.233\.1[0-9]{2}\.104 -# 72\.14\.(203|205|207|209|221)\.104 to 72\.14\.2[0-9]{2}\.104 -# 216\.239\.(51|59)\.104 to 216\.239\.5[0-9]\.104 -# 66\.102\.(7|9)\.104 to 66\.102\.[1-9]\.104 -# 2006-06-27 Sean Carlos http://www.antezeta.com/awstats.html -# added Onet.pl http://szukaj.onet.pl/ -# corrected name "Wirtualna Polska" from "Szukaj" (search); added link http://szukaj.wp.pl/ -# 2006-06-30 Sean Carlos http://www.antezeta.com/awstats.html -# Additional Polish Search Engines: -# added Dodaj.pl http://www.dodaj.pl/ -# added Gazeta.pl http://szukaj.gazeta.pl/ -# added Gery.pl http://szukaj.gery.pl/ -# added Hoga.pl http://www.hoga.pl/ -# added Interia.pl http://www.google.interia.pl/ -# added Katalog.Onet.pl http://katalog.onet.pl/ -# added NetSprint.pl http://www.netsprint.pl/ -# added o2.pl http://szukaj2.o2.pl/ -# added Polska http://szukaj.polska.pl/ -# added Szukacz http://www.szukacz.pl/ -# added Wow.pl http://szukaj.wow.pl/ -# added Sagool http://sagool.jp/ - -# 2006-08-25 Social Bookmarks -# International -# added del.icio.us/search - for now, just search referrer. To do: consider /tag/(tagname) referrer? -# added stumbleupon.com - No keywords supplied. -# added swik.net -# added digg. Keywords sometimes supplied. -# Italy -# added segnalo.alice.it - No keywords supplied. -# added ineffabile.it - No keywords supplied. - -# added filter for google groups. Attempt to parse group name as keyword. - -# 2006-09-14 -# added Eniro Sverige http://www.eniro.se/ -# added MyWebSearch http://search.mywebsearch.com/ -# added Teecno http://www.teecno.it/ Italian Open Source Search Engine - -#package AWSSE; - -# 2006-09-25 (Gabor Moizes) -# added 4-counter (Google alternative) http://4-counter.com/ -# added Googlecom (Google alternative) http://googlecom.com/ -# added Goggle (Google alternative) http://goggle.co.hu/ -# added Comet toolbar http://as.starware.com -# added new IP for Yahoo: 216.109.125.130 -# added Ledix http://ledix.net/ -# added AT&T search (powered by Google) http://www.att.net/ -# added Keresolap (Hungarian search engine) http://www.keresolap.hu/ -# added Mozbot (French search engine) http://www.mozbot.fr/ -# added Zoznam (Slovak search engine) http://www.zoznam.sk/ -# added sapo.pt (Portuguese search engine) http://www.sapo.pt/ -# added shaw.ca (powered by Google) http://start.shaw.ca/ -# added Searchalot http://www.searchalot.com/ -# added Copernic http://www.copernic.com/ -# added 216.109.125.130 to Yahoo -# added 66.218.69.11 to Yahoo -# added Avantfind http://www.avantfind.com/ -# added Steadysearch http://www.steadysearch.com/ -# added Steadysearch http://www.steady-search.com/ -# modified 216\.239\.5[0-9]\.104/search to 216\.239\.5[0-9]\.104 - - -# SearchEnginesSearchIDOrder -# It contains all matching criteria to search for in log fields. This list is -# used to know in which order to search Search Engines IDs. -# Most frequent one are in list1, used when LevelForSearchEnginesDetection is 1 or more -# Minor robots are in list2, used when LevelForSearchEnginesDetection is 2 or more -# Note: Regex IDs are in lower case and ' ' and '+' are changed into '_' -#------------------------------------------------------------------------------ -@SearchEnginesSearchIDOrder_list1=( -# Major international search engines -'google\.[\w.]+/products', -'base\.google\.', -'froogle\.google\.', -'groups\.google\.', -'images\.google\.', -'google\.', -'googlee\.', -'googlecom\.com', -'goggle\.co\.hu', -'216\.239\.32\.20', -'173\.194\.32\.223', -'216\.239\.(35|37|39|51)\.100', -'216\.239\.(35|37|39|51)\.101', -'216\.239\.5[0-9]\.104', -'64\.233\.1[0-9]{2}\.104', -'66\.102\.[1-9]\.104', -'66\.249\.93\.104', -'72\.14\.2[0-9]{2}\.104', -'msn\.', -'live\.com', -'bing\.', -'voila\.', -'mindset\.research\.yahoo', -'yahoo\.','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)', -'search\.aol\.co', -'tiscali\.', -'lycos\.', -'alexa\.com', -'alltheweb\.com', -'altavista\.', -'a9\.com', -'dmoz\.org', -'netscape\.', -'search\.terra\.', -'www\.search\.com', -'search\.sli\.sympatico\.ca', -'excite\.' -); - -@SearchEnginesSearchIDOrder_list2=( -# Minor international search engines -'4\-counter\.com', -'att\.net', -'bungeebonesdotcom', -'northernlight\.', -'hotbot\.', -'kvasir\.', -'webcrawler\.', -'metacrawler\.', -'go2net\.com', -'(^|\.)go\.com', -'euroseek\.', -'looksmart\.', -'spray\.', -'nbci\.com\/search', -'de\.ask.\com', # break out Ask country specific engines. (.jp is in Japan section) -'es\.ask.\com', -'fr\.ask.\com', -'it\.ask.\com', -'nl\.ask.\com', -'uk\.ask.\com', -'(^|\.)ask\.com', -'atomz\.', -'overture\.com', # Replace 'goto\.com','Goto.com', -'teoma\.', -'findarticles\.com', -'infospace\.com', -'mamma\.', -'dejanews\.', -'dogpile\.com', -'wisenut\.com', -'ixquick\.com', -'search\.earthlink\.net', -'i-une\.com', -'blingo\.com', -'centraldatabase\.org', -'clusty\.com', -'mysearch\.', -'vivisimo\.com', -'kartoo\.com', -'icerocket\.com', -'sphere\.com', -'ledix\.net', -'start\.shaw\.ca', -'searchalot\.com', -'copernic\.com', -'avantfind\.com', -'steadysearch\.com', -'steady-search\.com', -'claro-search\.com', -'www1\.search-results\.com', -'www\.holasearch\.com', -'search\.conduit\.com', -'static\.flipora\.com', -'(?:www[12]?|mixidj)\.delta-search\.com', -'start\.iminent\.com', -'www\.searchmobileonline\.com', -'int\.search-results\.com', -'www2\.inbox\.com', -'www\.govome\.com', -'find1friend\.com', -'start\.mysearchdial\.com', -'go\.speedbit\.com', -'search\.certified-toolbar\.com', -'search\.sweetim\.com', -'search\.searchcompletion\.com', -'en\.eazel\.com', -'sr\.searchfunmoods\.com', -'173\.194\.35\.177', -'dalesearch\.com', -'sweetpacks-search\.com', -'searchgol\.com', -'duckduckgo\.com', -'sr\.facemoods\.com', -'shoppstop\.com', -'searchya\.com', -'picsearch\.de', -'webssearches\.com', -'airzip\.inspsearch\.com', -'zapmeta\.de', -'localmoxie\.com', -'search-results\.mobi', -'androidsearch\.com', -'isearch\.nation\.com', -'search\.zonealarm\.com', -'www\.buenosearch\.com', -'search\.foxtab\.com', -'searches\.qone8\.com', -'startpage\.com', -'www\.qwant\.com', -'searches\.safehomepage\.com', -'searches\.vi-view\.com', -'wow\.utop\.it', -'windowssearch\.com', -'www\.wow\.com', -'globososo\.', -'kingtale3\.inspsearch\.com', -'swisscows\.ch', -'preciobarato\.xyz', -'www\.dregol\.com', -'search\.socialdownloadr\.com', -'int\.search\.myway\.com', -'de\.dolphin\.com', -'mys\.yoursearch\.me', -# Chello Portals -'chello\.at', -'chello\.be', -'chello\.cz', -'chello\.fr', -'chello\.hu', -'chello\.nl', -'chello\.no', -'chello\.pl', -'chello\.se', -'chello\.sk', -'chello', # required as catchall for new countries not yet known -# Mirago -'mirago\.be', -'mirago\.ch', -'mirago\.de', -'mirago\.dk', -'es\.mirago\.com', -'mirago\.fr', -'mirago\.it', -'mirago\.nl', -'no\.mirago\.com', -'mirago\.se', -'mirago\.co\.uk', -'mirago', # required as catchall for new countries not yet known -'answerbus\.com', -'icq\.com\/search', -'nusearch\.com', -'goodsearch\.com', -'scroogle\.org', -'questionanswering\.com', -'mywebsearch\.com', -'as\.starware\.com', -# Social Bookmarking Services -'del\.icio\.us', -'digg\.com', -'stumbleupon\.com', -'swik\.net', -'segnalo\.alice\.it', -'ineffabile\.it', -# Minor Australian search engines -'anzwers\.com\.au', -# Minor brazilian search engines -'engine\.exe', 'miner\.bol\.com\.br', -# Minor chinese search engines -'\.baidu\.com', # baidu search portal -'\.vnet\.cn', # powered by MSN -'\.soso\.com', # powered by Google -'\.sogou\.com', # powered by Sohu -'\.3721\.com', # powered by Yahoo! -'iask\.com', # powered by Sina -'\.accoona\.com', # Accoona -'\.163\.com', # powered by Google -'\.zhongsou\.com', # zhongsou search portal -# Minor czech search engines -'atlas\.cz','seznam\.cz','quick\.cz','centrum\.cz','jyxo\.(cz|com)','najdi\.to','redbox\.cz', -'isearch\.avg\.com', -# Minor danish search-engines -'opasia\.dk', 'danielsen\.com', 'sol\.dk', 'jubii\.dk', 'find\.dk', 'edderkoppen\.dk', 'netstjernen\.dk', 'orbis\.dk', 'tyfon\.dk', '1klik\.dk', 'ofir\.dk', -# Minor dutch search engines -'ilse\.','vindex\.', -# Minor english search engines -'(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk', -'search\.fbdownloader\.com', -'search\.fdownloadr\.com', -'search\.babylon\.com', -'my\.allgameshome\.com', -'surfcanyon\.com', -'uk\.foxstart\.com', -'yandex\.com', -# Minor finnish search engines -'haku\.www\.fi', -# Minor french search engines -'recherche\.aol\.fr','ctrouve\.','francite\.','\.lbb\.org','rechercher\.libertysurf\.fr', 'search[\w\-]+\.free\.fr', 'recherche\.club-internet\.fr', -'toile\.com', 'biglotron\.com', -'mozbot\.fr', -# Minor german search engines -'sucheaol\.aol\.de', -'o2suche\.aol\.de', -'fireball\.de','infoseek\.de','suche\d?\.web\.de','[a-z]serv\.rrzn\.uni-hannover\.de', -'suchen\.abacho\.de','(brisbane|suche)\.t-online\.de','allesklar\.de','meinestadt\.de', -'212\.227\.33\.241', -'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)', -'wwweasel\.de', -'netluchs\.de', -'schoenerbrausen\.de', -'suche\.gmx\.net', -'suche\.gmx\.at', -'ecosia\.org', -'de\.aolsearch\.com', -'suche\.aol\.de', -'www\.startxxl\.com', -'www\.benefind\.de', -'www\.amazon\.de.*search', #Just as a reminder, probably will not work as AWstats seems to consider the host part of an URL only -'de\.wow\.com', -'www\.vlips\.de', -'metager\.de', -'search\.1und1\.de', -'sm\.de', -'sumaja\.de', -'navigationshilfe\.t-online\.de', -'umfis\.de', -'fastbot\.de', -'tixuma\.de', -'suche\.freenet\.de', -'www\.izito\.de', -'extern\.peoplecheck\.de', -'www\.oneseek\.de', -'de\.wiki\.gov\.cn', -'umuwa\.de', -'suche\.1und1\.de', -'www\.metasuche\.ch', -# Minor Hungarian search engines -'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu', -'tango\.hu', -'keresolap\.hu', -'kereso\.startlap\.hu', -'polymeta\.hu', -# Minor Indian search engines -'sify\.com', -# Minor Italian search engines -'virgilio\.it','arianna\.libero\.it','supereva\.com','kataweb\.it','search\.alice\.it\.master','search\.alice\.it','gotuneed\.com', -'godado','jumpy\.it','shinyseek\.it','teecno\.it', -# Minor Israeli search engines -'search\.genieo\.com', -# Minor Japanese search engines -'ask\.jp','sagool\.jp', -'websearch\.rakuten\.co\.jp', -# Minor Norwegian search engines -'sok\.start\.no', 'eniro\.no', -# Minor Polish search engines -'szukaj\.wp\.pl','szukaj\.onet\.pl','dodaj\.pl','gazeta\.pl','gery\.pl','hoga\.pl','netsprint\.pl','interia\.pl','katalog\.onet\.pl','o2\.pl','polska\.pl','szukacz\.pl','wow\.pl', -# Minor russian search engines -'ya(ndex)?\.ru', 'aport\.ru', 'rambler\.ru', 'turtle\.ru', 'metabot\.ru', -'go\.mail\.ru', -# Minor Swedish search engines -'evreka\.passagen\.se','eniro\.se', -# Minor Slovak search engines -'zoznam\.sk', -# Minor Portuguese search engines -'sapo\.pt', -# Minor swiss search engines -'search\.ch', 'search\.bluewin\.ch', -'www\.zapmeta\.ch', -'etools\.ch', -# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines -'pogodak\.' -); -@SearchEnginesSearchIDOrder_listgen=( -# Generic search engines -'search\..*\.\w+' -); - - -# NotSearchEnginesKeys -# If a search engine key is found, we check its exclude list to know if it's -# really a search engine -#------------------------------------------------------------------------------ -%NotSearchEnginesKeys=( -'altavista\.'=>'babelfish\.altavista\.', -'google\.'=>'mail\.google\.', -'google\.'=>'translate\.google\.', -'google\.'=>'code\.google\.', -'google\.'=>'groups\.google\.', -'msn\.'=>'hotmail\.msn\.', -'tiscali\.'=>'mail\.tiscali\.', -'yahoo\.'=>'(?:picks|mail)\.yahoo\.|yahoo\.[^/]+/picks', -'yandex\.'=>'direct\.yandex\.' -); - - -# SearchEnginesHashID -# Each Search Engine Search ID is associated to an AWStats id string -#------------------------------------------------------------------------------ -%SearchEnginesHashID = ( -# Major international search engines -'google\.[\w.]+/products','google_products', -'base\.google\.','google_base', -'froogle\.google\.','google_froogle', -'groups\.google\.','google_groups', -'images\.google\.','google_image', -'google\.','google', -'googlee\.','google', -'googlecom\.com','google', -'goggle\.co\.hu','google', -'216\.239\.32\.20', 'google', -'173\.194\.32\.223', 'google', -'216\.239\.(35|37|39|51)\.100','google_cache', -'216\.239\.(35|37|39|51)\.101','google_cache', -'216\.239\.5[0-9]\.104','google_cache', -'64\.233\.1[0-9]{2}\.104','google_cache', -'66\.102\.[1-9]\.104','google_cache', -'66\.249\.93\.104','google_cache', -'72\.14\.2[0-9]{2}\.104','google_cache', -'msn\.','msn', -'live\.com','live', -'bing\.','bing', -'voila\.','voila', -'mindset\.research\.yahoo','yahoo_mindset', -'yahoo\.','yahoo','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)','yahoo', -'lycos\.','lycos', -'alexa\.com','alexa', -'alltheweb\.com','alltheweb', -'altavista\.','altavista', -'a9\.com','a9', -'dmoz\.org','dmoz', -'netscape\.','netscape', -'search\.terra\.','terra', -'www\.search\.com','search.com', -'tiscali\.','tiscali', -'search\.aol\.co','aol', -'search\.sli\.sympatico\.ca','sympatico', -'excite\.','excite', -# Minor international search engines -'4\-counter\.com','google4counter', -'att\.net','att', -'bungeebonesdotcom','bungeebonesdotcom', -'northernlight\.','northernlight', -'hotbot\.','hotbot', -'kvasir\.','kvasir', -'webcrawler\.','webcrawler', -'metacrawler\.','metacrawler', -'go2net\.com','go2net', -'(^|\.)go\.com','go', -'euroseek\.','euroseek', -'looksmart\.','looksmart', -'spray\.','spray', -'nbci\.com\/search','nbci', -'de\.ask.\com','askde', # break out Ask country specific engines. -'es\.ask.\com','askes', -'fr\.ask.\com','askfr', -'it\.ask.\com','askit', -'nl\.ask.\com','asknl', -'uk\.ask.\com','askuk', -'(^|\.)ask\.co\.uk','askuk', -'(^|\.)ask\.com','ask', -'atomz\.','atomz', -'overture\.com','overture', # Replace 'goto\.com','Goto.com', -'teoma\.','teoma', -'findarticles\.com','findarticles', -'infospace\.com','infospace', -'mamma\.','mamma', -'dejanews\.','dejanews', -'dogpile\.com','dogpile', -'wisenut\.com','wisenut', -'ixquick\.com','ixquick', -'search\.earthlink\.net','earthlink', -'i-une\.com','iune', -'blingo\.com','blingo', -'centraldatabase\.org','centraldatabase', -'clusty\.com','clusty', -'mysearch\.','mysearch', -'vivisimo\.com','vivisimo', -'kartoo\.com','kartoo', -'icerocket\.com','icerocket', -'sphere\.com','sphere', -'ledix\.net','ledix', -'start\.shaw\.ca','shawca', -'searchalot\.com','searchalot', -'copernic\.com','copernic', -'avantfind\.com','avantfind', -'steadysearch\.com','steadysearch', -'steady-search\.com','steadysearch', -'claro-search\.com','clarosearch', -'www1\.search-results\.com', 'searchresults', -'www\.holasearch\.com', 'holasearch', -'search\.conduit\.com', 'conduit', -'static\.flipora\.com', 'flipora', -'(?:www[12]?|mixidj)\.delta-search\.com', 'delta-search', -'start\.iminent\.com', 'iminent', -'www\.searchmobileonline\.com', 'searchmobileonline', -'int\.search-results\.com', 'nortonsavesearch', -'www2\.inbox\.com', 'inbox', -'www\.govome\.com', 'govome', -'find1friend\.com', 'find1friend', -'start\.mysearchdial\.com', 'mysearchdial', -'go\.speedbit\.com', 'speedbit', -'search\.certified-toolbar\.com', 'certifiedtoolbarsearch', -'search\.sweetim\.com', 'sweetim', -'search\.searchcompletion\.com', 'searchcompletion', -'en\.eazel\.com','eazelsearch', -'sr\.searchfunmoods\.com', 'searchfunmoods', -'173\.194\.35\.177', 'googleByIP', -'dalesearch\.com', 'dalesearch', -'sweetpacks-search\.com', 'sweetpacks', -'searchgol\.com', 'searchgol', -'duckduckgo\.com', 'duckduckgo', -'sr\.facemoods\.com', 'facemoods', -'shoppstop\.com', 'shoppstop', -'searchya\.com', 'searchya', -'picsearch\.de', 'picsearch', -'webssearches\.com', 'webssearches', -'airzip\.inspsearch\.com', 'webssearches', -'zapmeta\.de', 'zapmeta', -'localmoxie\.com', 'localmoxie', -'search-results\.mobi', 'search-results_mobi', -'androidsearch\.com', 'androidsearch', -'isearch\.nation\.com', 'isearch_nation_com', -'search\.zonealarm\.com', 'search_zonealarm_com', -'www\.buenosearch\.com', 'www_buenosearch_com', -'search\.foxtab\.com', 'search_foxtab_com', -'searches\.qone8\.com', 'searches_qone8_com', -'startpage\.com', 'startpage_com', -'www\.qwant\.com', 'qwant_com', -'searches\.safehomepage\.com', 'safehomepage_com', -'searches\.vi-view\.com', 'vi-view_com', -'wow\.utop\.it', 'wow_utop_it', -'windowssearch\.com', 'windowssearch_com', -'www\.wow\.com', 'www_wow_com', -'globososo\.', 'globososo', -'kingtale3\.inspsearch\.com', 'globososo', -'swisscows\.ch', 'swisscows_ch', -'preciobarato\.xyz', 'preciobarato_xyz', -'www\.dregol\.com', 'www_dregol_com', -'search\.socialdownloadr\.com', 'search_socialdownloadr_com', -'int\.search\.myway\.com', 'int_search_myway_com', -'de\.dolphin\.com', 'de_dolphin_com', -'mys\.yoursearch\.me', 'mys_yoursearch_me', -# Chello Portals -'chello\.at','chelloat', -'chello\.be','chellobe', -'chello\.cz','chellocz', -'chello\.fr','chellofr', -'chello\.hu','chellohu', -'chello\.nl','chellonl', -'chello\.no','chellono', -'chello\.pl','chellopl', -'chello\.se','chellose', -'chello\.sk','chellosk', -'chello','chellocom', -# Mirago -'mirago\.be','miragobe', -'mirago\.ch','miragoch', -'mirago\.de','miragode', -'mirago\.dk','miragodk', -'es\.mirago\.com','miragoes', -'mirago\.fr','miragofr', -'mirago\.it','miragoit', -'mirago\.nl','miragonl', -'no\.mirago\.com','miragono', -'mirago\.se','miragose', -'mirago\.co\.uk','miragocouk', -'mirago','mirago', # required as catchall for new countries not yet known -'answerbus\.com','answerbus', -'icq\.com\/search','icq', -'nusearch\.com','nusearch', -'goodsearch\.com','goodsearch', -'scroogle\.org','scroogle', -'questionanswering\.com','questionanswering', -'mywebsearch\.com','mywebsearch', -'as\.starware\.com','comettoolbar', -# Social Bookmarking Services -'del\.icio\.us','delicious', -'digg\.com','digg', -'stumbleupon\.com','stumbleupon', -'swik\.net','swik', -'segnalo\.alice\.it','segnalo', -'ineffabile\.it','ineffabile', -# Minor Australian search engines -'anzwers\.com\.au','anzwers', -# Minor brazilian search engines -'engine\.exe','engine', -'miner\.bol\.com\.br','miner', -# Minor chinese search engines -'\.baidu\.com','baidu', -'iask\.com','iask', -'\.accoona\.com','accoona', -'\.3721\.com','3721', -'\.163\.com','netease', -'\.soso\.com','soso', -'\.zhongsou\.com','zhongsou', -'\.vnet\.cn','vnet', -'\.sogou\.com','sogou', -# Minor czech search engines -'atlas\.cz','atlas', -'seznam\.cz','seznam', -'quick\.cz','quick', -'centrum\.cz','centrum', -'jyxo\.(cz|com)','jyxo', -'najdi\.to','najdi', -'redbox\.cz','redbox', -'isearch\.avg\.com', 'avgsearch', -# Minor danish search-engines -'opasia\.dk','opasia', -'danielsen\.com','danielsen', -'sol\.dk','sol', -'jubii\.dk','jubii', -'find\.dk','finddk', -'edderkoppen\.dk','edderkoppen', -'netstjernen\.dk','netstjernen', -'orbis\.dk','orbis', -'tyfon\.dk','tyfon', -'1klik\.dk','1klik', -'ofir\.dk','ofir', -# Minor dutch search engines -'ilse\.','ilse', -'vindex\.','vindex', -# Minor english search engines -'bbc\.co\.uk/cgi-bin/search','bbc', -'ifind\.freeserve','freeserve', -'looksmart\.co\.uk','looksmartuk', -'splut\.','splut', -'spotjockey\.','spotjockey', -'ukdirectory\.','ukdirectory', -'ukindex\.co\.uk','ukindex', -'ukplus\.','ukplus', -'searchy\.co\.uk','searchy', -'search\.fbdownloader\.com','fbdownloader', -'search\.fdownloadr\.com', 'fdownloadr_com', -'search\.babylon\.com', 'babylon', -'my\.allgameshome\.com', 'allgameshome', -'surfcanyon\.com', 'surfcanyon_com', -'uk\.foxstart\.com', 'uk_foxstart_com', -'yandex\.com', 'yandex_com', -# Minor finnish search engines -'haku\.www\.fi','haku', -# Minor french search engines -'recherche\.aol\.fr','aolfr', -'ctrouve\.','ctrouve', -'francite\.','francite', -'\.lbb\.org','lbb', -'rechercher\.libertysurf\.fr','libertysurf', -'search[\w\-]+\.free\.fr','free', -'recherche\.club-internet\.fr','clubinternet', -'toile\.com','toile', -'biglotron\.com', 'biglotron', -'mozbot\.fr', 'mozbot', -# Minor german search engines -'sucheaol\.aol\.de','aolde', -'o2suche\.aol\.de','o2aolde', -'fireball\.de','fireball', -'infoseek\.de','infoseek', -'suche\d?\.web\.de','webde', -'[a-z]serv\.rrzn\.uni-hannover\.de','meta', -'suchen\.abacho\.de','abacho', -'(brisbane|suche)\.t-online\.de','t-online', -'allesklar\.de','allesklar', -'meinestadt\.de','meinestadt', -'212\.227\.33\.241','metaspinner', -'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)','metacrawler_de', -'wwweasel\.de','wwweasel', -'netluchs\.de','netluchs', -'schoenerbrausen\.de','schoenerbrausen', -'suche\.gmx\.net', 'gmxsuche', -'suche\.gmx\.at', 'gmxsuche_at', -'ecosia\.org', 'ecosiasearch', -'de\.aolsearch\.com', 'aolsearch', -'suche\.aol\.de', 'aolsuche', -'www\.startxxl\.com', 'startxxl', -'www\.benefind\.de', 'benefind', -'www\.amazon\.de.*search', 'amazonsearch', #Not clear if this matches amazon searches only -'de\.wow\.com', 'wowsearch', -'www\.vlips\.de', 'vlips_de', -'metager\.de', 'metager', -'search\.1und1\.de', 'search_1und1_de', -'sm\.de', 'smde', -'sumaja\.de', 'sumaja', -'navigationshilfe\.t-online\.de', 'navigationshilfe', -'umfis\.de', 'umfis', -'fastbot\.de', 'fastbot_de', -'tixuma\.de', 'tixuma_de', -'suche\.freenet\.de', 'freenet_de', -'www\.izito\.de', 'izito_de', -'extern\.peoplecheck\.de', 'peoplecheck_de', -'www\.oneseek\.de', 'oneseek_de', -'de\.wiki\.gov\.cn', 'de_wiki_gov_cn', -'umuwa\.de', 'umuwa_de', -'suche\.1und1\.de', '1und1_de', -'www\.metasuche\.ch', 'metasuche_ch', -# Minor Hungarian search engines -'heureka\.hu','heureka', -'vizsla\.origo\.hu','origo', -'lapkereso\.hu','lapkereso', -'goliat\.hu','goliat', -'index\.hu','indexhu', -'wahoo\.hu','wahoo', -'webmania\.hu','webmania', -'search\.internetto\.hu','internetto', -'tango\.hu','tango_hu', -'keresolap\.hu','keresolap_hu', -'kereso\.startlap\.hu', 'startlap_hu', -'polymeta\.hu','polymeta_hu', -# Minor Indian search engines -'sify\.com','sify', -# Minor Italian search engines -'virgilio\.it','virgilio', -'arianna\.libero\.it','arianna', -'supereva\.com','supereva', -'kataweb\.it','kataweb', -'search\.alice\.it\.master','aliceitmaster', -'search\.alice\.it','aliceit', -'gotuneed\.com','gotuneed', -'godado','godado', -'jumpy\.it','jumpy\.it', -'shinyseek\.it','shinyseek\.it', -'teecno\.it','teecnoit', -# Minor Israeli search engines -'search\.genieo\.com', 'genieo', -# Minor Japanese search engines -'ask\.jp','askjp', -'sagool\.jp','sagool', -'websearch\.rakuten\.co\.jp', 'rakuten', -# Minor Norwegian search engines -'sok\.start\.no','start', 'eniro\.no','eniro', -# Minor Polish search engines -'szukaj\.wp\.pl','wp', -'szukaj\.onet\.pl','onetpl', -'dodaj\.pl','dodajpl', -'gazeta\.pl','gazetapl', -'gery\.pl','gerypl', -'netsprint\.pl\/hoga\-search','hogapl', -'netsprint\.pl','netsprintpl', -'interia\.pl','interiapl', -'katalog\.onet\.pl','katalogonetpl', -'o2\.pl','o2pl', -'polska\.pl','polskapl', -'szukacz\.pl','szukaczpl', -'wow\.pl','wowpl', -# Minor russian search engines -'ya(ndex)?\.ru','yandex', -'aport\.ru','aport', -'rambler\.ru','rambler', -'turtle\.ru','turtle', -'metabot\.ru','metabot', -'go\.mail\.ru', 'mailru', -# Minor Swedish search engines -'evreka\.passagen\.se','passagen', -'eniro\.se','enirose', -# Minor Slovak search engines -'zoznam\.sk','zoznam', -# Minor Portuguese search engines -'sapo\.pt','sapo', -# Minor swiss search engines -'search\.ch','searchch', -'search\.bluewin\.ch','bluewin', -'www\.zapmeta\.ch', 'zapmeta_ch', -'etools\.ch', 'etools_ch', -# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines -'pogodak\.','pogodak', -# Generic search engines -'search\..*\.\w+','search' -); - - -# SearchEnginesWithKeysNotInQuery -# List of search engines that store keyword as page instead of query parameter -#------------------------------------------------------------------------------ -%SearchEnginesWithKeysNotInQuery=( -'a9',1, # www.a9.com/searchkey1%20searchkey2 -'iminent',1, #http://start.iminent.com/StartWeb/1031/toolbox/#q=searchkey1%20searchkey2&additional_arguments -'de_wiki_gov_cn',1, #http://de.wiki.gov.cn/s_searchkey1%20searchkey2 -'umuwa_de', 1, #http://umuwa.de/searchkey or http://umuwa.de/searchkey/Images -'amazonsearch', 1 #http://www.amazon.de/gp/bit/apps/web/SERP/search/ref=bit_bds-p24_serp_cr_de?ie=UTF8tagbase=bds-p24&query=deutsch+8.+klasse+gymnasium+protokoll -); - -# SearchEnginesKnownUrl -# Known rules to extract keywords from a referrer search engine URL -#------------------------------------------------------------------------------ -%SearchEnginesKnownUrl=( -# Most common search engines -'alexa','q=', -'alltheweb','q(|uery)=', -'altavista','q=', -'a9','a9\.com\/', -'dmoz','search=', -'google_products','(p|q|as_p|as_q)=', -'google_base','(p|q|as_p|as_q)=', -'google_froogle','(p|q|as_p|as_q)=', -'google_groups','group\/', # does not work -'google_image','(p|q|as_p|as_q)=', -'google_cache','(p|q|as_p|as_q)=cache:[0-9A-Za-z]{12}:', -'google','(p|q|as_p|as_q)=', -'lycos','query=', -'msn','q=', -'live','q=', -'bing','q=', -'netscape','search=', -'tiscali','key=', -'aol','query=', -'terra','query=', -'voila','(kw|rdata)=', -'search.com','q=', -'yahoo_mindset','p=', -'yahoo','p=', -'sympatico', 'query=', -'excite','search=', -# Minor international search engines -'google4counter','(p|q|as_p|as_q)=', -'att','qry=', -'bungeebonesdotcom','query=', -'go','qt=', -'askde','(ask|q)=', # break out Ask country specific engines. -'askes','(ask|q)=', -'askfr','(ask|q)=', -'askit','(ask|q)=', -'asknl','(ask|q)=', -'ask','(ask|q)=', -'atomz','sp-q=', -'euroseek','query=', -'findarticles','key=', -'go2net','general=', -'hotbot','mt=', -'infospace','qkw=', -'kvasir', 'q=', -'looksmart','key=', -'mamma','query=', -'metacrawler','general=', -'nbci','keyword=', -'northernlight','qr=', -'overture','keywords=', -'dogpile', 'q(|kw)=', -'spray','string=', -'teoma','q=', -'webcrawler','searchText=', -'wisenut','query=', -'ixquick', 'query=', -'earthlink', 'q=', -'iune','(keywords|q)=', -'blingo','q=', -'centraldatabase','query=', -'clusty','query=', -'mysearch','searchfor=', -'vivisimo','query=', -# kartoo: No keywords passed in referring URL. -'kartoo','', -'icerocket','q=', -'sphere','q=', -'ledix','q=', -'shawca','q=', -'searchalot','q=', -'copernic','web\/', -'avantfind','keywords=', -'steadysearch','w=', -'clarosearch','q=', -'searchresults','q=', -'holasearch', 'q=', -'conduit', 'q=', -'flipora', 'q=', -'delta-search', 'q=', -'iminent', 'q=', -'searchmobileonline', 'q=', -'nortonsavesearch', 'q=', -'inbox', 'q(?:kw)?=', -'govome', 'q=', -'find1friend', 'q=', -'mysearchdial', 'q=', -'speedbit', 'q=', -'certifiedtoolbarsearch', 'q=', -'sweetim', 'q=', -'searchcompletion', 'q=', -'eazelsearch', 'q=', -'searchfunmoods', 'q=', -'googleByIP', 'q=', -'dalesearch', 'q=', -'sweetpacks', 'q=', -'searchgol', 'q=', -'duckduckgo', 'uddg=', -'facemoods', 'q=', -'shoppstop', 'keywords=', -'searchya', 'q=', -'picsearch', 'q=', -'webssearches', 'q=', -'zapmeta', 'query=', -'localmoxie', 'keyword=', -'search-results_mobi', 'q=', -'androidsearch', 'q=', -'isearch_nation_com', 'q=', -'search_zonealarm_com', 'q=', -'www_buenosearch_com', 'q=', -'search_foxtab_com', 'q=', -'searches_qone8_com', 'q=', -'startpage_com', 'query=', -'qwant_com', 'q=', -'safehomepage_com', 'q=', -'vi-view_com', 'q=', -'wow_utop_it', 'q=', -'windowssearch_com', 'q=', -'www_wow_com', 'q=', -'globososo', 'q=', -'swisscows_ch', 'query=', -'preciobarato_xyz', 's=', -'www_dregol_com', 'q=', -'search_socialdownloadr_com', 'q=', -'int_search_myway_com', 'searchfor=', -'de_dolphin_com', 'q=', -'mys_yoursearch_me', 'q=', -# Chello Portals -'chelloat','q1=', -'chellobe','q1=', -'chellocz','q1=', -'chellofr','q1=', -'chellohu','q1=', -'chellonl','q1=', -'chellono','q1=', -'chellopl','q1=', -'chellose','q1=', -'chellosk','q1=', -'chellocom','q1=', -# Mirago -'miragobe','(txtsearch|qry)=', -'miragoch','(txtsearch|qry)=', -'miragode','(txtsearch|qry)=', -'miragodk','(txtsearch|qry)=', -'miragoes','(txtsearch|qry)=', -'miragofr','(txtsearch|qry)=', -'miragoit','(txtsearch|qry)=', -'miragonl','(txtsearch|qry)=', -'miragono','(txtsearch|qry)=', -'miragose','(txtsearch|qry)=', -'miragocouk','(txtsearch|qry)=', -'mirago','(txtsearch|qry)=', -'answerbus','', # Does not provide query parameters -'icq','q=', -'nusearch','nusearch_terms=', -'goodsearch','Keywords=', -'scroogle','Gw=', # Does not always provide query parameters -'questionanswering','', -'mywebsearch','searchfor=', -'comettoolbar','qry=', -# Social Bookmarking Services -'delicious','all=', -'digg','s=', -'stumbleupon','', -'swik','swik\.net/', # does not work. Keywords follow domain, e.g. http://swik.net/awstats+analytics -'segnalo','', -'ineffabile','', -# Minor Australian search engines -'anzwers','search=', -# Minor brazilian search engines -'engine','p1=', 'miner','q=', -# Minor chinese search engines -'baidu','(wd|word)=', -'iask','(w|k)=', -'accoona','qt=', -'3721','(p|name)=', -'netease','q=', -'soso','q=', -'zhongsou','(word|w)=', -'sogou', 'query=', -'vnet','kw=', -# Minor czech search engines -'atlas','(searchtext|q)=', 'seznam','(w|q)=', 'quick','query=', 'centrum','q=', 'jyxo','(s|q)=', 'najdi','dotaz=', 'redbox','srch=', -'avgsearch', 'q=', -# Minor danish search engines -'opasia','q=', 'danielsen','q=', 'sol','q=', 'jubii','soegeord=', 'finddk','words=', 'edderkoppen','query=', 'orbis','search_field=', '1klik','query=', 'ofir','querytext=', -# Minor dutch search engines -'ilse','search_for=', 'vindex','in=', -# Minor english search engines -'askuk','(ask|q)=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=', -'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=', -'fbdownloader','q=', -'fdownloadr_com', 'q=', -'babylon','q=', -'allgameshome', 's=', -'surfcanyon_com', 'q=', -'uk_foxstart_com', 'q=', -'yandex_com', 'text=', -# Minor finnish search engines -'haku','w=', -# Minor french search engines -'francite','name=', 'clubinternet', 'q=', -'toile', 'q=', -'biglotron','question=', -'mozbot','q=', -# Minor german search engines -'aolde','q=', -'o2aolde', 'q=', -'fireball','q=', 'infoseek','qt=', 'webde','su=', -'abacho','q=', 't-online','q=', -'metaspinner','qry=', -'metacrawler_de','qry=', -'wwweasel','q=', -'netluchs','query=', -'schoenerbrausen','q=', -'gmxsuche', 'q=', -'gmxsuche_at', 'q=', -'ecosiasearch', 'q=', -'aolsearch', 'q=', -'aolsuche', 'q=', -'startxxl', 'q=', -'benefind', 'q=', -'amazonsearch', 'query=', -'wowsearch', 'q=', -'vlips_de', 'q=', -'metager', 'eingabe=', -'search_1und1_de', 'q=', -'smde', 'q=', -#'sumaja', 'no query string available', #There is no query string in the referrer url -'navigationshilfe', 'q=', -'umfis', 'suchbegriff=', -'fastbot_de', 'red=[0-9]*\+', -'tixuma_de', 'sc=', -'freenet_de', 'query=', -'izito_de', 'q=', -'peoplecheck_de', 'q=', -'oneseek_de', 'q=', -'de_wiki_gov_cn', 'de\.wiki\.gov\.cn\/s_', -'umuwa_de', 'umuwa\.de\/', -'1und1_de', 'q=', -'metasuche_ch', 'q=', -# Minor Hungarian search engines -'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=', -'keresolap_hu','q=', -'startlap_hu', 'q=', -'tango_hu','q=', -'polymeta_hu','', -# Minor Indian search engines -'sify','keyword=', -# Minor Italian search engines -'virgilio','qs=', -'arianna','query=', -'supereva','q=', -'kataweb','q=', -'aliceitmaster','qs=', -'aliceit','qs=', -'gotuneed','', # Not yet known -'godado','Keywords=', -'jumpy\.it','searchWord=', -'shinyseek\.it','KEY=', -'teecnoit','q=', -# Minor Israeli search engines -'genieo','q=', -# Minor Japanese search engines -'askjp','(ask|q)=', -'sagool','q=', -'rakuten', 'qt=', -# Minor Norwegian search engines -'start','q=', 'eniro','q=', -# Minor Polish search engines -'wp','szukaj=', -'onetpl','qt=', -'dodajpl','keyword=', -'gazetapl','slowo=', -'gerypl','q=', -'hogapl','qt=', -'netsprintpl','q=', -'interiapl','q=', -'katalogonetpl','qt=', -'o2pl','qt=', -'polskapl','qt=', -'szukaczpl','q=', -'wowpl','q=', -# Minor russian search engines -'yandex', 'text=', 'rambler','words=', 'aport', 'r=', 'metabot', 'st=', -'mailru', 'q=', -# Minor swedish search engines -'passagen','q=', -'enirose', 'hitta:', #Not sure if this works, as the keywords are part of the URL, and therefore the URL does not contain a question mark. -# Minor swiss search engines -'searchch', 'q=', 'bluewin', 'qry=', -'zapmeta_ch', 'query=', -'etools_ch', 'query=', -# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines -'pogodak', 'q=' -); - -# SearchEnginesKnownUrlNotFound -# Known rules to extract not found keywords from a referrer search engine URL -#------------------------------------------------------------------------------ -%SearchEnginesKnownUrlNotFound=( -# Most common search engines -'msn','origq=' -); - -# If no rules are known, we take first paramater not into WordsToCleanSearchUrl -#------------------------------------------------------------------------------ -@WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look='); - -# SearchEnginesKnownUTFCoding -# Known parameter that proves a search engine has coded its parameters in UTF-8 -#------------------------------------------------------------------------------ -%SearchEnginesKnownUTFCoding=( -# Most common search engines -'google','ie=utf-8', -'alltheweb','cs=utf-8' -); - - -# SearchEnginesHashLib -# List of search engines names -# 'search_engine_id', 'search_engine_name', -#------------------------------------------------------------------------------ -%SearchEnginesHashLib=( -# Major international search engines -'alexa','Alexa', -'alltheweb','AllTheWeb', -'altavista','AltaVista', -'a9', 'A9', -'dmoz','DMOZ', -'google_products','Google (Products)', -'google_base','Google (Base)', -'google_froogle','Froogle (Google)', -'google_groups','Google (Groups)', -'google_image','Google (Images)', -'google_cache','Google (cache)', -'google','Google', -'lycos','Lycos', -'msn','Microsoft MSN Search', -'live','Microsoft Windows Live', -'bing','Microsoft Bing', -'netscape','Netscape', -'aol','AOL', -'terra','Terra', -'tiscali','Tiscali', -'voila','Voila', -'search.com','Search.com', -'yahoo_mindset','Yahoo! Mindset', -'yahoo','Yahoo!', -'sympatico','Sympatico', -'excite','Excite', -# Minor international search engines -'google4counter','4-counter (Google)', -'att','AT&T search (powered by Google)', -'bungeebonesdotcom','BungeeBones', -'go','Go.com', -'askde','Ask Deutschland', -'askes','Ask España', # break out Ask country specific engines. -'askfr','Ask France', -'askit','Ask Italia', -'asknl','Ask Nederland', -'ask','Ask', -'atomz','Atomz', -'dejanews','DejaNews', -'euroseek','Euroseek', -'findarticles','Find Articles', -'go2net','Go2Net (Metamoteur)', -'hotbot','Hotbot', -'infospace','InfoSpace', -'kvasir','Kvasir', -'looksmart','Looksmart', -'mamma','Mamma', -'metacrawler','MetaCrawler (Metamoteur)', -'nbci','NBCI', -'northernlight','NorthernLight', -'overture','Overture', # Replace 'goto\.com','Goto.com', -'dogpile','Dogpile', -'spray','Spray', -'teoma','Teoma', # Replace 'directhit\.com','DirectHit', -'webcrawler','WebCrawler', -'wisenut','WISENut', -'ixquick','ix quick', -'earthlink', 'Earth Link', -'iune','i-une', -'blingo','Blingo', -'centraldatabase','GPU p2p search', -'clusty','Clusty', -'mysearch','My Search', -'vivisimo','Vivisimo', -'kartoo','Kartoo', -'icerocket','Icerocket (Blog)', -'sphere','Sphere (Blog)', -'ledix','Ledix', -'shawca','Shaw.ca', -'searchalot','Searchalot', -'copernic','Copernic', -'avantfind','Avantfind', -'steadysearch','Avantfind', -'clarosearch','Claro Search', -'searchresults','Search-results', -'holasearch', 'Hola Search', -'conduit', 'Conduit Search', -'flipora', 'Flipora', -'delta-search', 'Delta Search', -'iminent', 'Iminent', -'searchmobileonline', 'Search Mobile Online (StartApp)', -'nortonsavesearch', 'Norton Safe Search', -'inbox', 'Inbox Search', -'govome', 'Govome', -'find1friend', 'Find1Friend', -'mysearchdial', 'My Search Dial', -'speedbit', 'Speedbit', -'certifiedtoolbarsearch', 'Certified-Toolbar Search', -'sweetim', 'SweetIM Search', -'searchcompletion', 'SearchCompletion Search', -'eazelsearch', 'Eazel Search', -'searchfunmoods', 'Funmoods', -'googleByIP', 'Google (Access by IP-Address)', -'dalesearch', 'Dale Search', -'sweetpacks', 'Sweetpacks', -'searchgol', 'Search-Gol', -'duckduckgo', 'DuckDuckGo (Does not provide search keyphrases, using found page instead)', -'facemoods', 'Facemoods Search', -'shoppstop', 'ShoppStop', -'searchya', 'Searchya', -'picsearch', 'picsearch', -'webssearches', 'Various variants of Webssearches EMG Technologies and airzip.inspsearch.com', -#Jan 8, 2016: No genuine inspsearch.com search engine seems so exist, but there is a couple of search engines using subdomains of inspsearch.com. Unclear how these are related to each other. -'zapmeta', 'ZapMeta', -'localmoxie', 'Local Moxie', -'search-results_mobi', 'search-results.mobi', -'androidsearch', 'androidsearch.com', -'isearch_nation_com', 'Nation Search', -'search_zonealarm_com', 'Zone Alarm Search', -'www_buenosearch_com', 'BuenoSearch', -'search_foxtab_com', 'Foxtab Search', -'searches_qone8_com', 'Omiga-Plus', -'startpage_com', 'Startpage', -'qwant_com', 'qwant.com', -'safehomepage_com', 'safehomepage.com', -'vi-view_com', 'vi-view.com', -'wow_utop_it', 'wow.utop.it', -'windowssearch_com', 'windowssearch.com', -'www_wow_com', 'WOW.com', -'globososo', 'Various variants of Globososo (Kingtale Technology): www, searches, searches3, and at inspsearch.com (globososo, kingtale3)', -'swisscows_ch', 'Swisscows', -'preciobarato_xyz', 'Yandex', -'www_dregol_com', 'Dregol Search', -'search_socialdownloadr_com', 'Socialdownloadr', -'int_search_myway_com', 'MyWay', -'de_dolphin_com', 'Dolphin Search', -'mys_yoursearch_me', 'Yoursearch.me', -# Chello Portals -'chelloat','Chello Austria', -'chellobe','Chello Belgium', -'chellocz','Chello Czech Republic', -'chellofr','Chello France', -'chellohu','Chello Hungary', -'chellonl','Chello Netherlands', -'chellono','Chello Norway', -'chellopl','Chello Poland', -'chellose','Chello Sweden', -'chellosk','Chello Slovakia', -'chellocom','Chello (Country not recognized)', -# Mirago -'miragobe','Mirago Belgium', -'miragoch','Mirago Switzerland', -'miragode','Mirago Germany', -'miragodk','Mirago Denmark', -'miragoes','Mirago Spain', -'miragofr','Mirago France', -'miragoit','Mirago Italy', -'miragonl','Mirago Netherlands', -'miragono','Mirago Norway', -'miragose','Mirago Sweden', -'miragocouk','Mirago UK', -'mirago','Mirago (country unknown)', -'answerbus','Answerbus', -'icq','icq', -'nusearch','Nusearch', -'goodsearch','GoodSearch', -'scroogle','Scroogle', -'questionanswering','Questionanswering', -'mywebsearch','MyWebSearch', -'comettoolbar','Comet toolbar search', -# Social Bookmarking Services -'delicious','del.icio.us (Social Bookmark)', -'digg','Digg (Social Bookmark)', -'stumbleupon','Stumbleupon (Social Bookmark)', -'swik','Swik (Social Bookmark)', -'segnalo','Segnalo (Social Bookmark)', -'ineffabile','Ineffabile.it (Social Bookmark)', -# Minor Australian search engines -'anzwers','anzwers.com.au', -# Minor brazilian search engines -'engine','Cade', 'miner','Meta Miner', -# Minor chinese search engines -'baidu','Baidu', -'iask','Iask', -'accoona','Accoona', -'3721','3721', -'netease', 'NetEase', -'soso','SoSo', -'zhongsou','ZhongSou', -'sogou', 'SoGou', -'vnet','VNet', -# Minor czech search engines -'atlas','Atlas.cz', 'seznam','Seznam', 'quick','Quick.cz', 'centrum','Centrum.cz', 'jyxo','Jyxo.cz', 'najdi','Najdi.to', 'redbox','RedBox.cz', -'avgsearch', 'AVG Secure Search', -# Minor danish search-engines -'opasia','Opasia', 'danielsen','Thor (danielsen.com)', 'sol','SOL', 'jubii','Jubii', 'finddk','Find', 'edderkoppen','Edderkoppen', 'netstjernen','Netstjernen', 'orbis','Orbis', 'tyfon','Tyfon', '1klik','1Klik', 'ofir','Ofir', -# Minor dutch search engines -'ilse','Ilse','vindex','Vindex\.nl', -# Minor english search engines -'askuk','Ask UK', -'bbc','BBC', 'freeserve','Freeserve', 'looksmartuk','Looksmart UK', -'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk', -'fbdownloader','FBDownloader (fbdownloader)', -'fdownloadr_com', 'FBDownloader (fdownloadr)', -'babylon','Babylon', -'allgameshome', 'AllGamesHome', -'surfcanyon_com', 'SurfCanyon', -'uk_foxstart_com', 'Foxstart.com', -'yandex_com', 'Yandex', -# Minor finnish search engines -'haku','Ihmemaa', -# Minor french search engines -'aolfr','AOL (fr)', 'ctrouve','C\'est trouve', 'francite','Francite', 'lbb', 'LBB', 'libertysurf', 'Libertysurf', 'free', 'Free.fr', 'clubinternet', 'Club-internet', -'toile', 'Toile du Quebec', -'biglotron','Biglotron', -'mozbot','Mozbot', -# Minor German search engines -'aolde','AOL (de)', -'o2aolde', 'o2 Suche', -'fireball','Fireball', 'infoseek','Infoseek', -'webde','Web.de', -'abacho','Abacho', -'t-online','T-Online', -'allesklar','allesklar.de', 'meinestadt','meinestadt.de', -'metaspinner','metaspinner', -'metacrawler_de','metacrawler.de', -'wwweasel','WWWeasel', -'netluchs','Netluchs', -'schoenerbrausen','Schoenerbrausen/', -'gmxsuche', 'GMX Suche', -'gmxsuche_at', 'GMX Suche Oesterreich', -'ecosiasearch', 'Ecosia Search', -'aolsearch', 'AOL Search', -'aolsuche', 'AOL Suche', -'startxxl', 'StartXXL', -'benefind', 'benefind', -'amazonsearch', 'Amazon Web Search', -'wowsearch', 'Wow Search', -'vlips_de', 'vlips.de', -'metager', 'MetaGer', -'search_1und1_de', '1&1 Suche (subdomain "search")', -'smde', 'SM.de - Die SuchMaschine', -'sumaja', 'Sumaja', -'navigationshilfe', 'T-Online Navigationshilfe', -'umfis', 'UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland', -'fastbot_de', 'Fastbot.de (Does not provide search keyphrases, using found page instead)', -'tixuma_de', 'Tixuma Deutschland', -'freenet_de', 'suche.freenet.de', -'izito_de', 'iZito Deutschland', -'peoplecheck_de', 'PeopleCheck.de', -'oneseek_de', 'Metasuchmaschine OneSeek.de', -'de_wiki_gov_cn', 'Wiki Sucher', -'umuwa_de', 'Umuwa Deutschland', -'1und1_de', '1&1 Suche (subdomain "suche")', -'metasuche_ch', 'Metasuche.ch', -# Minor hungarian search engines -'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkereso', 'goliat','Goliat', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Kereso', -'tango_hu','Tango', -'keresolap_hu','Tango keresolap', -'startlap_hu','Startlab Kereso', -'polymeta_hu','Polymeta', -# Minor Indian search engines -'sify','Sify', -# Minor Italian search engines -'virgilio','Virgilio', -'arianna','Arianna', -'supereva','Supereva', -'kataweb','Kataweb', -'aliceitmaster','search.alice.it.master', -'aliceit','alice.it', -'gotuneed','got u need', -'godado','Godado.it', -'jumpy\.it','Jumpy.it', -'shinyseek\.it','Shinyseek.it', -'teecnoit','Teecno', -# Minor Israeli search engines -'genieo','Genieo', -# Minor Japanese search engines -'askjp','Ask Japan', -'sagool','Sagool', -'rakuten', 'websearch.rakuten.co.jp', -# Minor Norwegian search engines -'start','start.no', 'eniro','Eniro', -# Minor polish search engines -'wp','Wirtualna Polska', -'onetpl','Onet.pl', -'dodajpl','Dodaj.pl', -'gazetapl','Gazeta.pl', -'gerypl','Gery.pl', -'hogapl','Hoga.pl', -'netsprintpl','NetSprint.pl', -'interiapl','Interia.pl', -'katalogonetpl','Katalog.Onet.pl', -'o2pl','o2.pl', -'polskapl','Polska', -'szukaczpl','Szukacz', -'wowpl','Wow.pl', -# Minor russian search engines -'yandex', 'Yandex', 'aport', 'Aport', 'rambler', 'Rambler', 'turtle', 'Turtle', 'metabot', 'MetaBot', -'mailru','Mail.Ru', -# Minor Swedish search engines -'passagen','Evreka', -'enirose','Eniro Sverige', -# Minor Slovak search engines -'zoznam','Zoznam', -# Minor Portuguese search engines -'sapo','Sapo', -# Minor Swiss search engines -'searchch', 'search.ch', 'bluewin', 'search.bluewin.ch', -'zapmeta_ch', 'ZapMeta.ch', -'etools_ch', 'eTools.ch', -# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines -'pogodak','Pogodak.com', -# Generic search engines -'search','Unknown search engines' -); - - -# Sanity check. -# Enable this code and run perl search_engines.pm to check file entries are ok -#----------------------------------------------------------------------------- -#foreach my $key (@SearchEnginesSearchIDOrder_list1) { -# if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID"); -# foreach my $key2 (@SearchEnginesSearchIDOrder_list2) { if ($key2 eq $key) { error("$key is in 1 and 2\n"); } } -# foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 1 and gen\n"); } } -#} } -#foreach my $key (@SearchEnginesSearchIDOrder_list2) { -# if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID"); -# foreach my $key2 (@SearchEnginesSearchIDOrder_list1) { if ($key2 eq $key) { error("$key is in 2 and 1\n"); } } -# foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 2 and gen\n"); } } -#} } -#foreach my $key (@SearchEnginesSearchIDOrder_listgen) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_listgen with no value in SearchEnginesHashID"); } } -#foreach my $key (keys %NotSearchEnginesKeys) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in NotSearchEnginesKeys with no value in SearchEnginesHashID"); } } -#foreach my $key (keys %SearchEnginesKnownUrl) { -# my $found=0; -# foreach my $key2 (values %SearchEnginesHashID) { -# if ($key eq $key2) { $found=1; last; } -# } -# if (! $found) { die "Entry '$key' has been found in SearchEnginesKnownUrl with no value in SearchEnginesHashID"; } -#} -#foreach my $key (keys %SearchEnginesHashLib) { -# my $found=0; -# foreach my $key2 (values %SearchEnginesHashID) { -# if ($key eq $key2) { $found=1; last; } -# } -# if (! $found) { die "Entry '$key' has been found in SearchEnginesHashLib with no value in SearchEnginesHashID"; } -#} -#print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen; - -1; +# AWSTATS SEARCH ENGINES DATABASE +#------------------------------------------------------------------------------ +# If you want to add a Search Engine to extend AWStats database detection capabilities, +# you must add an entry in SearchEnginesSearchIDOrder, SearchEnginesHashID and in +# SearchEnginesHashLib. +# An entry if known in SearchEnginesKnownUrl is also welcome. +# +# to eldy: Please check if the following description is correct: +# You need the following information to specify a search engine: +# (a) A regular expression that matches the referrer string of the +# search engine. Unclear: What about slashes in the name of +# a search engine, e.g. as in 'ecosia.com/search'. Seems that +# AWStats will non find search strings containing a slash. +# Maybe use a search string without a slash, and - if necessary - +# an entry in %NotSearchEnginesKeys , if this search string +# matches entries that are not search engines. +# Example of a web address of a Amazon search engine: +# http://www.amazon.de/gp/bit/apps/web/SERP/search/ref=bit_bds-p24_serp_cr_de?ie=UTF8tagbase=bds-p24&query=deutsch+8.+klasse+gymnasium+protokoll +# (b) A unique string to identify the search engine within AWStats +# (c) A regular expression that finds the start of the query part in the +# referrer string +# (d) A HTML-fragment that goes into the reports generated by AWStats which +# identifies the search engine to human reader of the report. In the +# simplest case this is a string containing the name of the search +# engine. You can also provide a hypertext clause that presents the +# name together with a link to the search engine. +# +# The regular expression (a) goes into SearchEnginesSearchIDOrder_list1 +# or ..._list2. List 1 contains common search engines, list 2 those +# that are not so often used. +# +# SearchEnginesHashID contains to consecutive entries for each search +# engine: The regular expression (a) followed bei the search engine +# identifier (b) +# +# SearchEnginesKnownUrl specifies how to find the start of the query. +# For each search engine you enter the search engine identifier (b) +# followed by the regular expression (c). Unclear: It is possible to +# omit this entry. If you do this, how will AWStats find the start of +# the query? +# +# SearchEnginesHashLib contains also two entries for each search engine: +# The search engine identifier (b) followed by the HTML-Fragment (d) +# +# There are search engines that do not use a query part in their URLs. +# They put the search expression in the main part of the URL instead. +# AWStats is able to handle these cases. They are specified as described +# above, except the following two things: +# - The regular expression (c) searches the complete URL and not only +# the query part. +# - An additional Entry in the list %SearchEnginesWithKeysNotInQuery is +# necessary. +# +# +# AWStats runs a sanity check of the contents of search_engines.pm. This +# check detects the following things: +# - Inconsistencies (number of entries) +# It does not detect the following errors: +# - If the HTML-Fragment (d) is syntactically incorrect. +# +#------------------------------------------------------------------------------ + +# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html +# added minor italian search engines +# arianna http://arianna.libero.it/ +# supereva http://search.supereva.com/ +# kataweb http://kataweb.it/ +# corrected uk looksmart +# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=', +# to +# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=', +# corrected spelling +# internationnal -> international +# added 'google\.'=>'mail\.google\.', to NotSearchEnginesKeys in order to +# avoid counting gmail referrals as search engine traffic +# 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html +# avoid counting babelfish.altavista referrals as search engine traffic +# avoid counting translate.google referrals as search engine traffic +# 2005-11-20 Sean Carlos +# added missing 'tiscali','key=', entry. Check order +# 2005-11-22 Sean Carlos +# added Google Base & Froogle. Froogle not tested. +# 2006-04-18 Sean Carlos http://www.antezeta.com/awstats.html +# added biglotron.com (France) +# added blingo http://www.blingo.com/ +# added Clusty & Vivisimo +# added eniro.no (Norway) [https://sourceforge.net/forum/message.php?msg_id=3134783] +# added GPU p2p search http://search.centraldatabase.org/ +# added mail.tiscali to "not search engines list" [https://sourceforge.net/forum/message.php?msg_id=3166688] +# added Ask group's "mysearch" +# added sify.com (India) +# added sogou.com (Cina) [https://sourceforge.net/forum/message.php?msg_id=3501603] +# Ask changes: +# - added Ask Japan (ask.jp) +# - break out Ask new country level variants (DE, ES, FR, IT, NL) +# - updated Ask name from Ask Jevees +# - added Ask q= parameter - many recent searches probably not recognized; [https://sourceforge.net/forum/message.php?msg_id=3465444] +# - updated Ask uk (new uk.ask.com added to older ask.co.uk) +# updated voila kw|rdata parameter [https://sourceforge.net/forum/message.php?msg_id=3373912] +# for each new engine, added link to Search Engine. This serves to document engine. Done for major & Italian engines as well. Requires patch +# to AWStats to allow untranslated html. Otherwise html will appear instead of link. +# reviewed mnoGoSearch (http://www.mnogosearch.org/); the search engined mentioned no longer +# exists https://sourceforge.net/forum/message.php?msg_id=3025426 +# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html +# added 10 Chello European broadband portals (Austria, Belgium, Czech Republic, France, Hungary, The Netherlands, Norway, Poland, Slovakia, Sweden) +# added Alice Internal Search (blends data with Google?) search.alice.it.master:10005 +# added detection of google cache views from IPs 66.249.93.104 72.14.203.104 72.14.207.104 +# To do: add more extensive IP list; keywords not yet detected. +# added icerocket.com blog search http://www.icerocket.com/ +# added live.com (msn) http://www.live.com/ +# added Meta motor kartoo. Note: Kartoo does not provide search words in referrers, thus the engine will appear in the +# search engine list but the actual search words are not available. +# added netluchs.de http://www.netluchs.de/ +# added sphere.com blog search http://www.sphere.com/ +# added wwweasel.de http://wwweasel.de +# added Yahoo Mindset! http://mindset.research.yahoo.com/ +# updated Mirago query parameter recognition (qry=); added breakout for each country (France, Germany, Spain, Italy, Norway, Sweden, Denmark, Netherlands, Belgium, Switzerland) +# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html +# added Google cache IPs 64.233.183.104 & 66.102.7.104 +# 2006-05-20 Sean Carlos http://www.antezeta.com/awstats.html +# anzwers.com.au +# schoenerbrausen.de http://www.schoenerbrausen.de/ +# added Google cache IP 216.239.59.104 +# answerbus http://www.answerbus.com/ (does not provide keywords) +# 2006-05-23 Sean Carlos http://www.antezeta.com/awstats.html +# added Google cache IP 66.102.9.104, 64.233.161.104 +# 2006-06-23 Sean Carlos http://www.antezeta.com/awstats.html +# added Alice Search search.alice.it +# added GoodSearch http://www.goodsearch.com/ (does not provide keywords) "a Yahoo-powered search engine that donates money to your favorite charity or school each time you search the web" +# added googlee.com, variant of Google +# added gotuneed http://www.gotuneed.com/ Italian search engine, in beta +# added icq.com +# added logic to parse Google Cache search keywords. Seems to work for alpha but not numeric cache IDs, i.e. search?q=cache:lWVLmnuGJswJ: is recognized but q=cache:Yv5qxeJNuhgJ: is not recognized. The URL triggering the keywords will also appear. The URLs are probably too varied to parse out? +# added Nusearch http://www.nusearch.com/ +# added Polymeta www.polymeta.hu (does not provide keywords) +# added scroogle http://www.scroogle.org/ (does not always provide keywords) +# added Tango http://tango.hu/search.php?st=0&q=jeles+napok +# Changed Google Cache notation 64\.233\.(161|167|179|183|187)\.104 to 64\.233\.1[0-9]{2}\.104 +# 72\.14\.(203|205|207|209|221)\.104 to 72\.14\.2[0-9]{2}\.104 +# 216\.239\.(51|59)\.104 to 216\.239\.5[0-9]\.104 +# 66\.102\.(7|9)\.104 to 66\.102\.[1-9]\.104 +# 2006-06-27 Sean Carlos http://www.antezeta.com/awstats.html +# added Onet.pl http://szukaj.onet.pl/ +# corrected name "Wirtualna Polska" from "Szukaj" (search); added link http://szukaj.wp.pl/ +# 2006-06-30 Sean Carlos http://www.antezeta.com/awstats.html +# Additional Polish Search Engines: +# added Dodaj.pl http://www.dodaj.pl/ +# added Gazeta.pl http://szukaj.gazeta.pl/ +# added Gery.pl http://szukaj.gery.pl/ +# added Hoga.pl http://www.hoga.pl/ +# added Interia.pl http://www.google.interia.pl/ +# added Katalog.Onet.pl http://katalog.onet.pl/ +# added NetSprint.pl http://www.netsprint.pl/ +# added o2.pl http://szukaj2.o2.pl/ +# added Polska http://szukaj.polska.pl/ +# added Szukacz http://www.szukacz.pl/ +# added Wow.pl http://szukaj.wow.pl/ +# added Sagool http://sagool.jp/ + +# 2006-08-25 Social Bookmarks +# International +# added del.icio.us/search - for now, just search referrer. To do: consider /tag/(tagname) referrer? +# added stumbleupon.com - No keywords supplied. +# added swik.net +# added digg. Keywords sometimes supplied. +# Italy +# added segnalo.alice.it - No keywords supplied. +# added ineffabile.it - No keywords supplied. + +# added filter for google groups. Attempt to parse group name as keyword. + +# 2006-09-14 +# added Eniro Sverige http://www.eniro.se/ +# added MyWebSearch http://search.mywebsearch.com/ +# added Teecno http://www.teecno.it/ Italian Open Source Search Engine + +#package AWSSE; + +# 2006-09-25 (Gabor Moizes) +# added 4-counter (Google alternative) http://4-counter.com/ +# added Googlecom (Google alternative) http://googlecom.com/ +# added Goggle (Google alternative) http://goggle.co.hu/ +# added Comet toolbar http://as.starware.com +# added new IP for Yahoo: 216.109.125.130 +# added Ledix http://ledix.net/ +# added AT&T search (powered by Google) http://www.att.net/ +# added Keresolap (Hungarian search engine) http://www.keresolap.hu/ +# added Mozbot (French search engine) http://www.mozbot.fr/ +# added Zoznam (Slovak search engine) http://www.zoznam.sk/ +# added sapo.pt (Portuguese search engine) http://www.sapo.pt/ +# added shaw.ca (powered by Google) http://start.shaw.ca/ +# added Searchalot http://www.searchalot.com/ +# added Copernic http://www.copernic.com/ +# added 216.109.125.130 to Yahoo +# added 66.218.69.11 to Yahoo +# added Avantfind http://www.avantfind.com/ +# added Steadysearch http://www.steadysearch.com/ +# added Steadysearch http://www.steady-search.com/ +# modified 216\.239\.5[0-9]\.104/search to 216\.239\.5[0-9]\.104 + +# RobC 2016/09/02 $ +# massive edit..... +# added all google country TLDs to split count by google country. +# any google TLD not listed in here goes into 'Google catchall' for each country +# each google country has 3 possibilities, normal, images, or catchall (a catchall for other google sub domains) +# Several other search engines added too. +# Whole file cleaned up and partly sorted, comments taken out to make it easier to +# add, find and edit entries. Fact is that vast majority of searches are from Google so +# I made it Google centric which shows you which countries your google searches are coming from. +# Should now contain all searchengines from awstats 7.5 release (plus a lot more) +# May be too slow for big log files but you can try and see. +# Currently tuned for my top search engines. Yours will likely differ. +# If you put your sites 30 most used search engines at top of SearchEnginesSearchIDOrder_list1 and +# at top of SearchEnginesHashID starting with most used at top, then performance is improved a tad. +# SearchEnginesSearchIDOrder_list1 and SearchEnginesHashID must be in exact same order as each other. +# SearchEnginesKnownUrl and SearchEnginesHashLib can be in any order you like. +# Good luck +# +# p.s. save your current search-engines.pm file before using this one just in case its too slow or you find an error. +# I'm bound to have made one or two mistakes but it runs a years worth of log files from one my sites just fine. + +#------------------------------------------------------------------------------ +# SearchEnginesSearchIDOrder +# It contains all matching criteria to search for in log fields. This list is +# used to know in which order to search Search Engines IDs. +# Most frequent one are in list1, used when LevelForSearchEnginesDetection is 1 or more +# Minor robots are in list2, used when LevelForSearchEnginesDetection is 2 or more +# Note: Regex IDs are in lower case and ' ' and '+' are changed into '_' +#------------------------------------------------------------------------------ +@SearchEnginesSearchIDOrder_list1=( +'^www\.google\.co\.uk$', +'^images\.google\.co\.uk$', +'google\.co\.uk$', +'^www\.google\.com$', +'^images\.google\.com$', +'google\.com$', + +'bing\.com', +'^(www\.|)yandex\.ru$', +'^(www\.|)yandex\.com\.tr$', +'^(www\.|)yandex\.ua$', +'^(www\.|)yandex\.kz$', +'^(www\.|)yandex\.com$', +'yandex\.', +'r\.search\.yahoo\.com', + +'^www\.google\.de$', +'^images\.google\.de$', +'google\.de$', +'^www\.google\.fr$', +'^images\.google\.fr$', +'google\.fr$', +'^www\.google\.ca$', +'^images\.google\.ca$', +'google\.ca$', +'^www\.google\.es$', +'^images\.google\.es$', +'google\.es$', +'^www\.google\.com\.au$', +'^images\.google\.com\.au$', +'google\.com\.au$', +'^www\.google\.nl$', +'^images\.google\.nl$', +'google\.nl$', +'^www\.google\.gr$', +'^images\.google\.gr$', +'google\.gr$', +'^www\.google\.se$', +'^images\.google\.se$', +'google\.se$', +'^www\.google\.ie$', +'^images\.google\.ie$', +'google\.ie$', +'^www\.google\.it$', +'^images\.google\.it$', +'google\.it$', +'^www\.google\.no$', +'^images\.google\.no$', +'google\.no$', +'^www\.google\.com\.tr$', +'^images\.google\.com\.tr$', +'google\.com\.tr$', +'^www\.google\.co\.in$', +'^images\.google\.co\.in$', +'google\.co\.in$', +'^www\.google\.pt$', +'^images\.google\.pt$', +'google\.pt$', +'^www\.google\.hr$', +'^images\.google\.hr$', +'google\.hr$', +'^www\.google\.co\.nz$', +'^images\.google\.co\.nz$', +'google\.co\.nz$', +'^www\.google\.pl$', +'^images\.google\.pl$', +'google\.pl$', +'^www\.google\.ac$', +'^images\.google\.ac$', +'google\.ac$', +'^www\.google\.ad$', +'^images\.google\.ad$', +'google\.ad$', +'^www\.google\.ae$', +'^images\.google\.ae$', +'google\.ae$', +'^www\.google\.al$', +'^images\.google\.al$', +'google\.al$', +'^www\.google\.am$', +'^images\.google\.am$', +'google\.am$', +'^www\.google\.as$', +'^images\.google\.as$', +'google\.as$', +'^www\.google\.at$', +'^images\.google\.at$', +'google\.at$', +'^www\.google\.az$', +'^images\.google\.az$', +'google\.az$', +'^www\.google\.ba$', +'^images\.google\.ba$', +'google\.ba$', +'^www\.google\.be$', +'^images\.google\.be$', +'google\.be$', +'^www\.google\.bf$', +'^images\.google\.bf$', +'google\.bf$', +'^www\.google\.bg$', +'^images\.google\.bg$', +'google\.bg$', +'^www\.google\.bi$', +'^images\.google\.bi$', +'google\.bi$', +'^www\.google\.bj$', +'^images\.google\.bj$', +'google\.bj$', +'^www\.google\.bs$', +'^images\.google\.bs$', +'google\.bs$', +'^www\.google\.bt$', +'^images\.google\.bt$', +'google\.bt$', +'^www\.google\.by$', +'^images\.google\.by$', +'google\.by$', +'^www\.google\.cat$', +'^images\.google\.cat$', +'google\.cat$', +'^www\.google\.cc$', +'^images\.google\.cc$', +'google\.cc$', +'^www\.google\.cd$', +'^images\.google\.cd$', +'google\.cd$', +'^www\.google\.cf$', +'^images\.google\.cf$', +'google\.cf$', +'^www\.google\.cg$', +'^images\.google\.cg$', +'google\.cg$', +'^www\.google\.ch$', +'^images\.google\.ch$', +'google\.ch$', +'^www\.google\.ci$', +'^images\.google\.ci$', +'google\.ci$', +'^www\.google\.cl$', +'^images\.google\.cl$', +'google\.cl$', +'^www\.google\.cm$', +'^images\.google\.cm$', +'google\.cm$', +'^www\.google\.cn$', +'^images\.google\.cn$', +'google\.cn$', +'^www\.google\.co\.ao$', +'^images\.google\.co\.ao$', +'google\.co\.ao$', +'^www\.google\.co\.bw$', +'^images\.google\.co\.bw$', +'google\.co\.bw$', +'^www\.google\.co\.ck$', +'^images\.google\.co\.ck$', +'google\.co\.ck$', +'^www\.google\.co\.cr$', +'^images\.google\.co\.cr$', +'google\.co\.cr$', +'^www\.google\.co\.id$', +'^images\.google\.co\.id$', +'google\.co\.id$', +'^www\.google\.co\.il$', +'^images\.google\.co\.il$', +'google\.co\.il$', +'^www\.google\.co\.jp$', +'^images\.google\.co\.jp$', +'google\.co\.jp$', +'^www\.google\.co\.ke$', +'^images\.google\.co\.ke$', +'google\.co\.ke$', +'^www\.google\.co\.kr$', +'^images\.google\.co\.kr$', +'google\.co\.kr$', +'^www\.google\.co\.ls$', +'^images\.google\.co\.ls$', +'google\.co\.ls$', +'^www\.google\.co\.ma$', +'^images\.google\.co\.ma$', +'google\.co\.ma$', +'^www\.google\.co\.mz$', +'^images\.google\.co\.mz$', +'google\.co\.mz$', +'^www\.google\.co\.th$', +'^images\.google\.co\.th$', +'google\.co\.th$', +'^www\.google\.co\.tz$', +'^images\.google\.co\.tz$', +'google\.co\.tz$', +'^www\.google\.co\.ug$', +'^images\.google\.co\.ug$', +'google\.co\.ug$', +'^www\.google\.co\.uz$', +'^images\.google\.co\.uz$', +'google\.co\.uz$', +'^www\.google\.co\.ve$', +'^images\.google\.co\.ve$', +'google\.co\.ve$', +'^www\.google\.co\.vi$', +'^images\.google\.co\.vi$', +'google\.co\.vi$', +'^www\.google\.co\.za$', +'^images\.google\.co\.za$', +'google\.co\.za$', +'^www\.google\.co\.zm$', +'^images\.google\.co\.zm$', +'google\.co\.zm$', +'^www\.google\.co\.zw$', +'^images\.google\.co\.zw$', +'google\.co\.zw$', +'^www\.google\.com\.af$', +'^images\.google\.com\.af$', +'google\.com\.af$', +'^www\.google\.com\.ag$', +'^images\.google\.com\.ag$', +'google\.com\.ag$', +'^www\.google\.com\.ai$', +'^images\.google\.com\.ai$', +'google\.com\.ai$', +'^www\.google\.com\.ar$', +'^images\.google\.com\.ar$', +'google\.com\.ar$', +'^www\.google\.com\.bd$', +'^images\.google\.com\.bd$', +'google\.com\.bd$', +'^www\.google\.com\.bh$', +'^images\.google\.com\.bh$', +'google\.com\.bh$', +'^www\.google\.com\.bn$', +'^images\.google\.com\.bn$', +'google\.com\.bn$', +'^www\.google\.com\.bo$', +'^images\.google\.com\.bo$', +'google\.com\.bo$', +'^www\.google\.com\.br$', +'^images\.google\.com\.br$', +'google\.com\.br$', +'^www\.google\.com\.bz$', +'^images\.google\.com\.bz$', +'google\.com\.bz$', +'^www\.google\.com\.co$', +'^images\.google\.com\.co$', +'google\.com\.co$', +'^www\.google\.com\.cu$', +'^images\.google\.com\.cu$', +'google\.com\.cu$', +'^www\.google\.com\.cy$', +'^images\.google\.com\.cy$', +'google\.com\.cy$', +'^www\.google\.com\.do$', +'^images\.google\.com\.do$', +'google\.com\.do$', +'^www\.google\.com\.ec$', +'^images\.google\.com\.ec$', +'google\.com\.ec$', +'^www\.google\.com\.eg$', +'^images\.google\.com\.eg$', +'google\.com\.eg$', +'^www\.google\.com\.et$', +'^images\.google\.com\.et$', +'google\.com\.et$', +'^www\.google\.com\.fj$', +'^images\.google\.com\.fj$', +'google\.com\.fj$', +'^www\.google\.com\.gh$', +'^images\.google\.com\.gh$', +'google\.com\.gh$', +'^www\.google\.com\.gi$', +'^images\.google\.com\.gi$', +'google\.com\.gi$', +'^www\.google\.com\.gt$', +'^images\.google\.com\.gt$', +'google\.com\.gt$', +'^www\.google\.com\.hk$', +'^images\.google\.com\.hk$', +'google\.com\.hk$', +'^www\.google\.com\.jm$', +'^images\.google\.com\.jm$', +'google\.com\.jm$', +'^www\.google\.com\.kh$', +'^images\.google\.com\.kh$', +'google\.com\.kh$', +'^www\.google\.com\.kw$', +'^images\.google\.com\.kw$', +'google\.com\.kw$', +'^www\.google\.com\.lb$', +'^images\.google\.com\.lb$', +'google\.com\.lb$', +'^www\.google\.com\.lc$', +'^images\.google\.com\.lc$', +'google\.com\.lc$', +'^www\.google\.com\.ly$', +'^images\.google\.com\.ly$', +'google\.com\.ly$', +'^www\.google\.com\.mm$', +'^images\.google\.com\.mm$', +'google\.com\.mm$', +'^www\.google\.com\.mt$', +'^images\.google\.com\.mt$', +'google\.com\.mt$', +'^www\.google\.com\.mx$', +'^images\.google\.com\.mx$', +'google\.com\.mx$', +'^www\.google\.com\.my$', +'^images\.google\.com\.my$', +'google\.com\.my$', +'^www\.google\.com\.na$', +'^images\.google\.com\.na$', +'google\.com\.na$', +'^www\.google\.com\.nf$', +'^images\.google\.com\.nf$', +'google\.com\.nf$', +'^www\.google\.com\.ng$', +'^images\.google\.com\.ng$', +'google\.com\.ng$', +'^www\.google\.com\.ni$', +'^images\.google\.com\.ni$', +'google\.com\.ni$', +'^www\.google\.com\.np$', +'^images\.google\.com\.np$', +'google\.com\.np$', +'^www\.google\.com\.om$', +'^images\.google\.com\.om$', +'google\.com\.om$', +'^www\.google\.com\.pa$', +'^images\.google\.com\.pa$', +'google\.com\.pa$', +'^www\.google\.com\.pe$', +'^images\.google\.com\.pe$', +'google\.com\.pe$', +'^www\.google\.com\.pg$', +'^images\.google\.com\.pg$', +'google\.com\.pg$', +'^www\.google\.com\.ph$', +'^images\.google\.com\.ph$', +'google\.com\.ph$', +'^www\.google\.com\.pk$', +'^images\.google\.com\.pk$', +'google\.com\.pk$', +'^www\.google\.com\.pr$', +'^images\.google\.com\.pr$', +'google\.com\.pr$', +'^www\.google\.com\.py$', +'^images\.google\.com\.py$', +'google\.com\.py$', +'^www\.google\.com\.qa$', +'^images\.google\.com\.qa$', +'google\.com\.qa$', +'^www\.google\.com\.sa$', +'^images\.google\.com\.sa$', +'google\.com\.sa$', +'^www\.google\.com\.sb$', +'^images\.google\.com\.sb$', +'google\.com\.sb$', +'^www\.google\.com\.sg$', +'^images\.google\.com\.sg$', +'google\.com\.sg$', +'^www\.google\.com\.sl$', +'^images\.google\.com\.sl$', +'google\.com\.sl$', +'^www\.google\.com\.sv$', +'^images\.google\.com\.sv$', +'google\.com\.sv$', +'^www\.google\.com\.tj$', +'^images\.google\.com\.tj$', +'google\.com\.tj$', +'^www\.google\.com\.tw$', +'^images\.google\.com\.tw$', +'google\.com\.tw$', +'^www\.google\.com\.ua$', +'^images\.google\.com\.ua$', +'google\.com\.ua$', +'^www\.google\.com\.uy$', +'^images\.google\.com\.uy$', +'google\.com\.uy$', +'^www\.google\.com\.vc$', +'^images\.google\.com\.vc$', +'google\.com\.vc$', +'^www\.google\.com\.vn$', +'^images\.google\.com\.vn$', +'google\.com\.vn$', +'^www\.google\.cv$', +'^images\.google\.cv$', +'google\.cv$', +'^www\.google\.cz$', +'^images\.google\.cz$', +'google\.cz$', +'^www\.google\.dj$', +'^images\.google\.dj$', +'google\.dj$', +'^www\.google\.dk$', +'^images\.google\.dk$', +'google\.dk$', +'^www\.google\.dm$', +'^images\.google\.dm$', +'google\.dm$', +'^www\.google\.dz$', +'^images\.google\.dz$', +'google\.dz$', +'^www\.google\.ee$', +'^images\.google\.ee$', +'google\.ee$', +'^www\.google\.fi$', +'^images\.google\.fi$', +'google\.fi$', +'^www\.google\.fm$', +'^images\.google\.fm$', +'google\.fm$', +'^www\.google\.ga$', +'^images\.google\.ga$', +'google\.ga$', +'^www\.google\.ge$', +'^images\.google\.ge$', +'google\.ge$', +'^www\.google\.gf$', +'^images\.google\.gf$', +'google\.gf$', +'^www\.google\.gg$', +'^images\.google\.gg$', +'google\.gg$', +'^www\.google\.gl$', +'^images\.google\.gl$', +'google\.gl$', +'^www\.google\.gm$', +'^images\.google\.gm$', +'google\.gm$', +'^www\.google\.gp$', +'^images\.google\.gp$', +'google\.gp$', +'^www\.google\.gy$', +'^images\.google\.gy$', +'google\.gy$', +'^www\.google\.hn$', +'^images\.google\.hn$', +'google\.hn$', +'^www\.google\.ht$', +'^images\.google\.ht$', +'google\.ht$', +'^www\.google\.hu$', +'^images\.google\.hu$', +'google\.hu$', +'^www\.google\.im$', +'^images\.google\.im$', +'google\.im$', +'^www\.google\.io$', +'^images\.google\.io$', +'google\.io$', +'^www\.google\.iq$', +'^images\.google\.iq$', +'google\.iq$', +'^www\.google\.is$', +'^images\.google\.is$', +'google\.is$', +'^www\.google\.je$', +'^images\.google\.je$', +'google\.je$', +'^www\.google\.jo$', +'^images\.google\.jo$', +'google\.jo$', +'^www\.google\.kg$', +'^images\.google\.kg$', +'google\.kg$', +'^www\.google\.ki$', +'^images\.google\.ki$', +'google\.ki$', +'^www\.google\.kz$', +'^images\.google\.kz$', +'google\.kz$', +'^www\.google\.la$', +'^images\.google\.la$', +'google\.la$', +'^www\.google\.li$', +'^images\.google\.li$', +'google\.li$', +'^www\.google\.lk$', +'^images\.google\.lk$', +'google\.lk$', +'^www\.google\.lt$', +'^images\.google\.lt$', +'google\.lt$', +'^www\.google\.lu$', +'^images\.google\.lu$', +'google\.lu$', +'^www\.google\.lv$', +'^images\.google\.lv$', +'google\.lv$', +'^www\.google\.md$', +'^images\.google\.md$', +'google\.md$', +'^www\.google\.me$', +'^images\.google\.me$', +'google\.me$', +'^www\.google\.mg$', +'^images\.google\.mg$', +'google\.mg$', +'^www\.google\.mk$', +'^images\.google\.mk$', +'google\.mk$', +'^www\.google\.ml$', +'^images\.google\.ml$', +'google\.ml$', +'^www\.google\.mn$', +'^images\.google\.mn$', +'google\.mn$', +'^www\.google\.ms$', +'^images\.google\.ms$', +'google\.ms$', +'^www\.google\.mu$', +'^images\.google\.mu$', +'google\.mu$', +'^www\.google\.mv$', +'^images\.google\.mv$', +'google\.mv$', +'^www\.google\.mw$', +'^images\.google\.mw$', +'google\.mw$', +'^www\.google\.ne$', +'^images\.google\.ne$', +'google\.ne$', +'^www\.google\.nr$', +'^images\.google\.nr$', +'google\.nr$', +'^www\.google\.nu$', +'^images\.google\.nu$', +'google\.nu$', +'^www\.google\.pn$', +'^images\.google\.pn$', +'google\.pn$', +'^www\.google\.ps$', +'^images\.google\.ps$', +'google\.ps$', +'^www\.google\.ro$', +'^images\.google\.ro$', +'google\.ro$', +'^www\.google\.rs$', +'^images\.google\.rs$', +'google\.rs$', +'^www\.google\.ru$', +'^images\.google\.ru$', +'google\.ru$', +'^www\.google\.rw$', +'^images\.google\.rw$', +'google\.rw$', +'^www\.google\.sc$', +'^images\.google\.sc$', +'google\.sc$', +'^www\.google\.sh$', +'^images\.google\.sh$', +'google\.sh$', +'^www\.google\.si$', +'^images\.google\.si$', +'google\.si$', +'^www\.google\.sk$', +'^images\.google\.sk$', +'google\.sk$', +'^www\.google\.sm$', +'^images\.google\.sm$', +'google\.sm$', +'^www\.google\.sn$', +'^images\.google\.sn$', +'google\.sn$', +'^www\.google\.so$', +'^images\.google\.so$', +'google\.so$', +'^www\.google\.sr$', +'^images\.google\.sr$', +'google\.sr$', +'^www\.google\.st$', +'^images\.google\.st$', +'google\.st$', +'^www\.google\.td$', +'^images\.google\.td$', +'google\.td$', +'^www\.google\.tg$', +'^images\.google\.tg$', +'google\.tg$', +'^www\.google\.tk$', +'^images\.google\.tk$', +'google\.tk$', +'^www\.google\.tl$', +'^images\.google\.tl$', +'google\.tl$', +'^www\.google\.tm$', +'^images\.google\.tm$', +'google\.tm$', +'^www\.google\.tn$', +'^images\.google\.tn$', +'google\.tn$', +'^www\.google\.to$', +'^images\.google\.to$', +'google\.to$', +'^www\.google\.tt$', +'^images\.google\.tt$', +'google\.tt$', +'^www\.google\.us$', +'^images\.google\.us$', +'google\.us$', +'^www\.google\.vg$', +'^images\.google\.vg$', +'google\.vg$', +'^www\.google\.vu$', +'^images\.google\.vu$', +'google\.vu$', +'^www\.google\.ws$', +'^images\.google\.ws$', +'google\.ws$', + +'au\.search\.yahoo\.com', +'ca\.search\.yahoo\.com', +'fr\.search\.yahoo\.com', +'image\.search\.yahoo\.co\.jp', +'images\.search\.yahoo\.com', +'it\.search\.yahoo\.com', +'ph\.search\.yahoo\.com', +'se\.search\.yahoo\.com', +'search\.yahoo\.co\.jp', +'sg\.search\.yahoo\.com', +'sg\.yhs4\.search\.yahoo\.com', +'tw\.images\.search\.yahoo\.com', +'uk\.search\.yahoo\.com', +'uk\.yhs4\.search\.yahoo\.com', +'us\.search\.yahoo\.com', +'search\.yahoo\.com', +'search\.yahoo', + +'babylon\.com', +'search\.conduit\.com', +'avg\.com', +'mywebsearch\.com', +'msn\.', +'live\.com', +'search\.aol\.co\.uk', +'searcht\.aol\.co\.uk', +'searcht\.aol\.com', +'search\.aol\.com', +'recherche\.aol\.fr', +'suche\.aol\.de', +'de\.aolsearch\.com', +'sucheaol\.aol\.de', +'search\.hp\.my\.aol\.co\.uk', +'search\.aol\.pl', +'o2suche\.aol\.de', +'search\.aol\.', +'^uk\.ask\.com$', +'^de\.ask\.com$', +'tb\.ask\.com$', +'^images\.ask\.com$', + +'base\.google\.', +'froogle\.google\.', +'google\.[\w.]+/products', +'googlecom\.com', +'groups\.google\.', +'googlee\.', + +'216\.239\.32\.20', +'173\.194\.32\.223', +'216\.239\.(35|37|39|51)\.100', +'216\.239\.(35|37|39|51)\.101', +'216\.239\.5[0-9]\.104', +'64\.233\.1[0-9]{2}\.104', +'66\.102\.[1-9]\.104', +'66\.249\.93\.104', +'72\.14\.2[0-9]{2}\.104', + +'maps\.google', +'translate\.google', +'173\.194\.35\.177', +'google\.', + +'mail.yahoo.net', +'(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)', +'mindset\.research\.yahoo', +'yahoo\.', + +'^www\.ask\.jp$', +'^es\.ask\.com$', +'^fr\.ask\.com$', +'^www\.iask\.com$', +'^it\.ask\.com$', +'^nl\.ask\.com$', +'(^|\.)ask\.com$', + +'www\.tesco\.net', +'yell\.', +'zapmeta\.ch', +'zapmeta\.com', +'zapmeta\.de', +'zapmeta', + +'(^|\.)go\.com', +'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)', +'\.facemoods\.com', +'\.funmoods\.com', +'\.metasearch\.', +'\.wow\.com', +'163\.com', +'1klik\.dk', +'1search-board\.com', +'212\.227\.33\.241', +'3721\.com', +'4\-counter\.com', +'a9\.com', +'accoona\.com', +'alexa\.com', +'allesklar\.de', +'alltheweb\.com', +'altavista\.', +'amazon\.', +'androidsearch\.com', +'answerbus\.com', +'anzwers\.com\.au', +'aport\.ru', +'arianna\.libero\.it', +'as\.starware\.com', +'asevenboard\.com', +'atlanticbb\.net', +'atlas\.cz', +'atomz\.', +'att\.net', +'auone\.jp', +'avantfind\.com', +'baidu\.com', +'bbc\.co\.uk/cgi-bin/search', +'biglotron\.com', +'blekko\.com', +'blingo\.com', +'bungeebonesdotcom', +'centraldatabase\.org', +'centrum\.cz', +'centurylink\.net', +'charter\.net', +'chatzum\.com', +'checkparams\.com', +'chello\.at', +'chello\.be', +'chello\.cz', +'chello\.fr', +'chello\.hu', +'chello\.nl', +'chello\.no', +'chello\.pl', +'chello\.se', +'chello\.sk', +'chello', +'claro-search\.com', +'clinck\.in', +'clusty\.com', +'copernic\.com', +'crawler\.com', +'ctrouve\.', +'dalesearch\.com', +'danielsen\.com', +'daum\.net', +'de\.dolphin\.com', +'de\.wiki\.gov\.cn', +'de\.wow\.com', +'dejanews\.', +'del\.icio\.us', +'delta-search', +'digg\.com', +'dmoz\.org', +'dodaj\.pl', +'dogpile\.com', +'duckduckgo', +'easysearch\.org\.uk', +'ecosia\.org', +'edderkoppen\.dk', +'engine\.exe', +'eniro\.no', +'eniro\.se', +'ereadingsource\.com', +'es\.mirago\.com', +'etools\.ch', +'euroseek\.', +'everyclick\.com', +'evreka\.passagen\.se', +'excite\.', +'extern\.peoplecheck\.de', +'fastbot\.de', +'find\.dk', +'find1friend\.com', +'findamo\.com', +'findarticles\.com', +'fireball\.de', +'forums\.iboats\.com', +'foxstart\.com', +'francite\.', +'gazeta\.pl', +'gery\.pl', +'globososo\.', +'go\.mail\.ru', +'go\.speedbit\.com', +'go2net\.com', +'godado', +'goggle\.co\.hu$', +'goliat\.hu', +'goodsearch\.com', +'gotuneed\.com', +'haku\.www\.fi', +'heureka\.hu', +'hoga\.pl', +'hotbot\.', +'hubwe\.net', +'icerocket\.com', +'icq\.com\/search', +'ifind\.freeserve', +'ilse\.', +'inbox\.com', +'index\.hu', +'ineffabile\.it', +'info\.co\.uk', +'infoseek\.de', +'infospace\.com', +'inspsearch\.com', +'int\.search\.myway\.com', +'int\.search-results\.com', +'interia\.pl', +'isearch\.nation\.com', +'i-une\.com', +'ixquick\.com', +'izito\.co\.uk', +'izito\.co\.de', +'izito\.', +'jubii\.dk', +'jumpy\.it', +'juno\.com', +'jyxo\.(cz|com)', +'kartoo\.com', +'katalog\.onet\.pl', +'kataweb\.it', +'kereso\.startlap\.hu', +'keresolap\.hu', +'kvasir\.', +'kvitters\.', +'lapkereso\.hu', +'lbb\.org', +'ledix\.net', +'libero\.it/', +'localmoxie\.com', +'looksmart\.co\.uk', +'looksmart\.', +'lycos\.', +'mamma\.', +'meinestadt\.de', +'meta\.ua', +'metabot\.ru', +'metacrawler\.', +'metager\.de', +'miner\.bol\.com\.br', +'mirago\.be', +'mirago\.ch', +'mirago\.co\.uk', +'mirago\.de', +'mirago\.dk', +'mirago\.fr', +'mirago\.it', +'mirago\.nl', +'mirago\.se', +'mirago', +'mitrasites\.com', +'mozbot\.fr', +'my\.allgameshome\.com', +'mys\.yoursearch\.me', +'mysearch\.', +'mysearchdial\.com', +'mysearchresults\.com', +'myway\.com', +'najdi\.to', +'nation\.', +'navigationshilfe\.t-online\.de', +'nbci\.com\/search', +'netluchs\.de', +'netscape\.', +'netsprint\.pl', +'netstjernen\.dk', +'netzero\.net', +'no\.mirago\.com', +'northernlight\.', +'nusearch\.com', +'o2\.pl', +'ofir\.dk', +'opasia\.dk', +'orangeworld\.co\.uk', +'orbis\.dk', +'overture\.com', +'pch\.com', +'picsearch\.de', +'pictures\.com', +'plusnetwork\.com', +'pogodak\.', +'polska\.pl', +'polymeta\.hu', +'preciobarato\.xyz', +'questionanswering\.com', +'quick\.cz', +'rambler\.ru', +'recherche\.club-internet\.fr', +'rechercher\.libertysurf\.fr', +'redbox\.cz', +'rr\.com', +'sagool\.jp', +'sapo\.pt', +'schoenerbrausen\.de', +'scroogle\.org', +'search[\w\-]+\.free\.fr', +'search\.1und1\.de', +'search\.alice\.it\.master', +'search\.alice\.it', +'search\.alot\.com', +'search\.bluewin\.ch', +'search\.bt\.com', +'search\.certified-toolbar\.com', +'search\.ch', +'search\.comcast\.net', +'search\.earthlink\.net', +'search\.fbdownloader\.com', +'search\.fdownloadr\.com', +'search\.foxtab\.com', +'search\.genieo\.com', +'search\.goo\.ne\.jp', +'search\.handycafe\.com', +'search\.incredibar\.com', +'search\.incredimail\.com', +'search\.internetto\.hu', +'search\.orange\.co\.uk', +'search\.sky\.com', +'search\.sli\.sympatico\.ca', +'search\.socialdownloadr\.com', +'search\.sweetim\.com', +'search\.terra\.', +'search\.zonealarm\.com', +'searchalgo\.com', +'searchalot\.com', +'searchcompletion\.com', +'searches\.qone8\.com', +'searches\.safehomepage\.com', +'searches\.vi-view\.com', +'searchesnavigator\.com', +'searchgol\.com', +'searchlistingsite\.com', +'searchmobileonline\.com', +'search-results\.com', +'search-results\.mobi', +'searchsafer\.com', +'searchy\.co\.uk', +'searchya\.com', +'segnalo\.alice\.it', +'semalt\.com', +'sensis\.com\.au', +'seznam\.cz', +'shinyseek\.it', +'shoppstop\.com', +'sify\.com', +'sm\.de', +'smartsuggestor\.com', +'snapdo\.com', +'softonic\.com', +'sogou\.com', +'sok\.start\.no', +'sol\.dk', +'soso\.com', +'sphere\.com', +'splut\.', +'spotjockey\.', +'spray\.', +'sr\.searchfunmoods\.com', +'start\.iminent\.com', +'start\.shaw\.ca', +'start\.toshiba\.com', +'startpage\.com', +'startsiden\.no', +'static\.flipora\.com', +'steadysearch\.com', +'steady-search\.com', +'stumbleupon\.com', +'suche\.1und1\.de', +'suche\.freenet\.de', +'suche\.gmx\.at', +'suche\.gmx\.net', +'suche\d?\.web\.de', +'suchen\.abacho\.de', +'sumaja\.de', +'supereva\.com', +'surfcanyon\.com', +'sweetpacks-search\.com', +'swik\.net', +'swisscows\.ch', +'szukacz\.pl', +'szukaj\.onet\.pl', +'szukaj\.wp\.pl', +'talktalk\.co\.uk', +'tango\.hu', +'teecno\.it', +'teoma\.', +'theallsearches\.com', +'three\.co\.uk', +'tiscali\.', +'tixuma\.de', +'toile\.com', +'t-online\.de', +'t-online', +'turtle\.ru', +'tyfon\.dk', +'uk\.foxstart\.com', +'ukdirectory\.', +'ukindex\.co\.uk', +'ukplus\.', +'umfis\.de', +'umuwa\.de', +'uni-hannover\.de', +'vindex\.', +'virgilio\.it', +'virginmedia\.com', +'vivisimo\.com', +'vizsla\.origo\.hu', +'vnet\.cn', +'voila\.', +'wahoo\.hu', +'webalta\.ru', +'webcrawler\.', +'webmania\.hu', +'websearch\.rakuten\.co\.jp', +'whorush\.com', +'windowssearch\.com', +'wisenut\.com', +'wow\.pl', +'wow\.utop\.it', +'www\.benefind\.de', +'www\.buenosearch\.com', +'www\.dregol\.com', +'www\.govome\.com', +'www\.holasearch\.com', +'www\.metasuche\.ch', +'www\.oneseek\.de', +'www\.qwant\.com', +'www\.search\.com', +'www\.startxxl\.com', +'www\.vlips\.de', +'www\.wow\.com', +'www1\.search-results\.com', +'wwweasel\.de', +'yourbestsearch\.net', +'youtube\.com', +'zhongsou\.com', +'zoeken\.nl', +'zoznam\.sk' +); + +@SearchEnginesSearchIDOrder_list2=( +# Minor search engines +); +@SearchEnginesSearchIDOrder_listgen=( +# Generic search engines +'search\..*\.\w+' +); + +# NotSearchEnginesKeys +# If a search engine key is found, we check its exclude list to know if it's +# really a search engine +#------------------------------------------------------------------------------ +%NotSearchEnginesKeys=( +'altavista\.'=>'babelfish\.altavista\.', +'google\.'=>'mail\.google\.', +'google\.'=>'translate\.google\.', +'msn\.'=>'hotmail\.msn\.', +'tiscali\.'=>'mail\.tiscali\.', +'yahoo\.'=>'mail\.yahoo\.', +'yandex\.'=>'direct\.yandex\.' +); + +# SearchEnginesHashID +# Each Search Engine Search ID is associated to an AWStats id string +#------------------------------------------------------------------------------ +%SearchEnginesHashID = ( +# Major search engines + +'^www\.google\.co\.uk$','www_google_co_uk', +'^images\.google\.co\.uk$','images_google_co_uk', +'google\.co\.uk$','google_co_uk', +'^www\.google\.com$','www_google_com', +'^images\.google\.com$','images_google_com', +'google\.com$','google_com', + +'bing\.com','bing', +'^(www\.|)yandex\.ru$','yandexru', +'^(www\.|)yandex\.com\.tr$','yandexcomtr', +'^(www\.|)yandex\.ua$','yandexua', +'^(www\.|)yandex\.kz$','yandexkz', +'^(www\.|)yandex\.com$','yandexcom', +'yandex\.','yandex', +'r\.search\.yahoo\.com','r_search_yahoo_com', + +'^www\.google\.de$','www_google_de', +'^images\.google\.de$','images_google_de', +'google\.de$','google_de', +'^www\.google\.fr$','www_google_fr', +'^images\.google\.fr$','images_google_fr', +'google\.fr$','google_fr', +'^www\.google\.ca$','www_google_ca', +'^images\.google\.ca$','images_google_ca', +'google\.ca$','google_ca', +'^www\.google\.es$','www_google_es', +'^images\.google\.es$','images_google_es', +'google\.es$','google_es', +'^www\.google\.com\.au$','www_google_com_au', +'^images\.google\.com\.au$','images_google_com_au', +'google\.com\.au$','google_com_au', +'^www\.google\.nl$','www_google_nl', +'^images\.google\.nl$','images_google_nl', +'google\.nl$','google_nl', +'^www\.google\.gr$','www_google_gr', +'^images\.google\.gr$','images_google_gr', +'google\.gr$','google_gr', +'^www\.google\.se$','www_google_se', +'^images\.google\.se$','images_google_se', +'google\.se$','google_se', +'^www\.google\.ie$','www_google_ie', +'^images\.google\.ie$','images_google_ie', +'google\.ie$','google_ie', +'^www\.google\.it$','www_google_it', +'^images\.google\.it$','images_google_it', +'google\.it$','google_it', +'^www\.google\.no$','www_google_no', +'^images\.google\.no$','images_google_no', +'google\.no$','google_no', +'^www\.google\.com\.tr$','www_google_com_tr', +'^images\.google\.com\.tr$','images_google_com_tr', +'google\.com\.tr$','google_com_tr', +'^www\.google\.co\.in$','www_google_co_in', +'^images\.google\.co\.in$','images_google_co_in', +'google\.co\.in$','google_co_in', +'^www\.google\.pt$','www_google_pt', +'^images\.google\.pt$','images_google_pt', +'google\.pt$','google_pt', +'^www\.google\.hr$','www_google_hr', +'^images\.google\.hr$','images_google_hr', +'google\.hr$','google_hr', +'^www\.google\.co\.nz$','www_google_co_nz', +'^images\.google\.co\.nz$','images_google_co_nz', +'google\.co\.nz$','google_co_nz', +'^www\.google\.pl$','www_google_pl', +'^images\.google\.pl$','images_google_pl', +'google\.pl$','google_pl', +'^www\.google\.ac$','www_google_ac', +'^images\.google\.ac$','images_google_ac', +'google\.ac$','google_ac', +'^www\.google\.ad$','www_google_ad', +'^images\.google\.ad$','images_google_ad', +'google\.ad$','google_ad', +'^www\.google\.ae$','www_google_ae', +'^images\.google\.ae$','images_google_ae', +'google\.ae$','google_ae', +'^www\.google\.al$','www_google_al', +'^images\.google\.al$','images_google_al', +'google\.al$','google_al', +'^www\.google\.am$','www_google_am', +'^images\.google\.am$','images_google_am', +'google\.am$','google_am', +'^www\.google\.as$','www_google_as', +'^images\.google\.as$','images_google_as', +'google\.as$','google_as', +'^www\.google\.at$','www_google_at', +'^images\.google\.at$','images_google_at', +'google\.at$','google_at', +'^www\.google\.az$','www_google_az', +'^images\.google\.az$','images_google_az', +'google\.az$','google_az', +'^www\.google\.ba$','www_google_ba', +'^images\.google\.ba$','images_google_ba', +'google\.ba$','google_ba', +'^www\.google\.be$','www_google_be', +'^images\.google\.be$','images_google_be', +'google\.be$','google_be', +'^www\.google\.bf$','www_google_bf', +'^images\.google\.bf$','images_google_bf', +'google\.bf$','google_bf', +'^www\.google\.bg$','www_google_bg', +'^images\.google\.bg$','images_google_bg', +'google\.bg$','google_bg', +'^www\.google\.bi$','www_google_bi', +'^images\.google\.bi$','images_google_bi', +'google\.bi$','google_bi', +'^www\.google\.bj$','www_google_bj', +'^images\.google\.bj$','images_google_bj', +'google\.bj$','google_bj', +'^www\.google\.bs$','www_google_bs', +'^images\.google\.bs$','images_google_bs', +'google\.bs$','google_bs', +'^www\.google\.bt$','www_google_bt', +'^images\.google\.bt$','images_google_bt', +'google\.bt$','google_bt', +'^www\.google\.by$','www_google_by', +'^images\.google\.by$','images_google_by', +'google\.by$','google_by', +'^www\.google\.cat$','www_google_cat', +'^images\.google\.cat$','images_google_cat', +'google\.cat$','google_cat', +'^www\.google\.cc$','www_google_cc', +'^images\.google\.cc$','images_google_cc', +'google\.cc$','google_cc', +'^www\.google\.cd$','www_google_cd', +'^images\.google\.cd$','images_google_cd', +'google\.cd$','google_cd', +'^www\.google\.cf$','www_google_cf', +'^images\.google\.cf$','images_google_cf', +'google\.cf$','google_cf', +'^www\.google\.cg$','www_google_cg', +'^images\.google\.cg$','images_google_cg', +'google\.cg$','google_cg', +'^www\.google\.ch$','www_google_ch', +'^images\.google\.ch$','images_google_ch', +'google\.ch$','google_ch', +'^www\.google\.ci$','www_google_ci', +'^images\.google\.ci$','images_google_ci', +'google\.ci$','google_ci', +'^www\.google\.cl$','www_google_cl', +'^images\.google\.cl$','images_google_cl', +'google\.cl$','google_cl', +'^www\.google\.cm$','www_google_cm', +'^images\.google\.cm$','images_google_cm', +'google\.cm$','google_cm', +'^www\.google\.cn$','www_google_cn', +'^images\.google\.cn$','images_google_cn', +'google\.cn$','google_cn', +'^www\.google\.co\.ao$','www_google_co_ao', +'^images\.google\.co\.ao$','images_google_co_ao', +'google\.co\.ao$','google_co_ao', +'^www\.google\.co\.bw$','www_google_co_bw', +'^images\.google\.co\.bw$','images_google_co_bw', +'google\.co\.bw$','google_co_bw', +'^www\.google\.co\.ck$','www_google_co_ck', +'^images\.google\.co\.ck$','images_google_co_ck', +'google\.co\.ck$','google_co_ck', +'^www\.google\.co\.cr$','www_google_co_cr', +'^images\.google\.co\.cr$','images_google_co_cr', +'google\.co\.cr$','google_co_cr', +'^www\.google\.co\.id$','www_google_co_id', +'^images\.google\.co\.id$','images_google_co_id', +'google\.co\.id$','google_co_id', +'^www\.google\.co\.il$','www_google_co_il', +'^images\.google\.co\.il$','images_google_co_il', +'google\.co\.il$','google_co_il', +'^www\.google\.co\.jp$','www_google_co_jp', +'^images\.google\.co\.jp$','images_google_co_jp', +'google\.co\.jp$','google_co_jp', +'^www\.google\.co\.ke$','www_google_co_ke', +'^images\.google\.co\.ke$','images_google_co_ke', +'google\.co\.ke$','google_co_ke', +'^www\.google\.co\.kr$','www_google_co_kr', +'^images\.google\.co\.kr$','images_google_co_kr', +'google\.co\.kr$','google_co_kr', +'^www\.google\.co\.ls$','www_google_co_ls', +'^images\.google\.co\.ls$','images_google_co_ls', +'google\.co\.ls$','google_co_ls', +'^www\.google\.co\.ma$','www_google_co_ma', +'^images\.google\.co\.ma$','images_google_co_ma', +'google\.co\.ma$','google_co_ma', +'^www\.google\.co\.mz$','www_google_co_mz', +'^images\.google\.co\.mz$','images_google_co_mz', +'google\.co\.mz$','google_co_mz', +'^www\.google\.co\.th$','www_google_co_th', +'^images\.google\.co\.th$','images_google_co_th', +'google\.co\.th$','google_co_th', +'^www\.google\.co\.tz$','www_google_co_tz', +'^images\.google\.co\.tz$','images_google_co_tz', +'google\.co\.tz$','google_co_tz', +'^www\.google\.co\.ug$','www_google_co_ug', +'^images\.google\.co\.ug$','images_google_co_ug', +'google\.co\.ug$','google_co_ug', +'^www\.google\.co\.uz$','www_google_co_uz', +'^images\.google\.co\.uz$','images_google_co_uz', +'google\.co\.uz$','google_co_uz', +'^www\.google\.co\.ve$','www_google_co_ve', +'^images\.google\.co\.ve$','images_google_co_ve', +'google\.co\.ve$','google_co_ve', +'^www\.google\.co\.vi$','www_google_co_vi', +'^images\.google\.co\.vi$','images_google_co_vi', +'google\.co\.vi$','google_co_vi', +'^www\.google\.co\.za$','www_google_co_za', +'^images\.google\.co\.za$','images_google_co_za', +'google\.co\.za$','google_co_za', +'^www\.google\.co\.zm$','www_google_co_zm', +'^images\.google\.co\.zm$','images_google_co_zm', +'google\.co\.zm$','google_co_zm', +'^www\.google\.co\.zw$','www_google_co_zw', +'^images\.google\.co\.zw$','images_google_co_zw', +'google\.co\.zw$','google_co_zw', +'^www\.google\.com\.af$','www_google_com_af', +'^images\.google\.com\.af$','images_google_com_af', +'google\.com\.af$','google_com_af', +'^www\.google\.com\.ag$','www_google_com_ag', +'^images\.google\.com\.ag$','images_google_com_ag', +'google\.com\.ag$','google_com_ag', +'^www\.google\.com\.ai$','www_google_com_ai', +'^images\.google\.com\.ai$','images_google_com_ai', +'google\.com\.ai$','google_com_ai', +'^www\.google\.com\.ar$','www_google_com_ar', +'^images\.google\.com\.ar$','images_google_com_ar', +'google\.com\.ar$','google_com_ar', +'^www\.google\.com\.bd$','www_google_com_bd', +'^images\.google\.com\.bd$','images_google_com_bd', +'google\.com\.bd$','google_com_bd', +'^www\.google\.com\.bh$','www_google_com_bh', +'^images\.google\.com\.bh$','images_google_com_bh', +'google\.com\.bh$','google_com_bh', +'^www\.google\.com\.bn$','www_google_com_bn', +'^images\.google\.com\.bn$','images_google_com_bn', +'google\.com\.bn$','google_com_bn', +'^www\.google\.com\.bo$','www_google_com_bo', +'^images\.google\.com\.bo$','images_google_com_bo', +'google\.com\.bo$','google_com_bo', +'^www\.google\.com\.br$','www_google_com_br', +'^images\.google\.com\.br$','images_google_com_br', +'google\.com\.br$','google_com_br', +'^www\.google\.com\.bz$','www_google_com_bz', +'^images\.google\.com\.bz$','images_google_com_bz', +'google\.com\.bz$','google_com_bz', +'^www\.google\.com\.co$','www_google_com_co', +'^images\.google\.com\.co$','images_google_com_co', +'google\.com\.co$','google_com_co', +'^www\.google\.com\.cu$','www_google_com_cu', +'^images\.google\.com\.cu$','images_google_com_cu', +'google\.com\.cu$','google_com_cu', + +'^www\.google\.com\.cy$','www_google_com_cy', +'^images\.google\.com\.cy$','images_google_com_cy', +'google\.com\.cy$','google_com_cy', +'^www\.google\.com\.do$','www_google_com_do', +'^images\.google\.com\.do$','images_google_com_do', +'google\.com\.do$','google_com_do', +'^www\.google\.com\.ec$','www_google_com_ec', +'^images\.google\.com\.ec$','images_google_com_ec', +'google\.com\.ec$','google_com_ec', +'^www\.google\.com\.eg$','www_google_com_eg', +'^images\.google\.com\.eg$','images_google_com_eg', +'google\.com\.eg$','google_com_eg', +'^www\.google\.com\.et$','www_google_com_et', +'^images\.google\.com\.et$','images_google_com_et', +'google\.com\.et$','google_com_et', +'^www\.google\.com\.fj$','www_google_com_fj', +'^images\.google\.com\.fj$','images_google_com_fj', +'google\.com\.fj$','google_com_fj', +'^www\.google\.com\.gh$','www_google_com_gh', +'^images\.google\.com\.gh$','images_google_com_gh', +'google\.com\.gh$','google_com_gh', +'^www\.google\.com\.gi$','www_google_com_gi', +'^images\.google\.com\.gi$','images_google_com_gi', +'google\.com\.gi$','google_com_gi', +'^www\.google\.com\.gt$','www_google_com_gt', +'^images\.google\.com\.gt$','images_google_com_gt', +'google\.com\.gt$','google_com_gt', +'^www\.google\.com\.hk$','www_google_com_hk', +'^images\.google\.com\.hk$','images_google_com_hk', +'google\.com\.hk$','google_com_hk', +'^www\.google\.com\.jm$','www_google_com_jm', +'^images\.google\.com\.jm$','images_google_com_jm', +'google\.com\.jm$','google_com_jm', +'^www\.google\.com\.kh$','www_google_com_kh', +'^images\.google\.com\.kh$','images_google_com_kh', +'google\.com\.kh$','google_com_kh', +'^www\.google\.com\.kw$','www_google_com_kw', +'^images\.google\.com\.kw$','images_google_com_kw', +'google\.com\.kw$','google_com_kw', +'^www\.google\.com\.lb$','www_google_com_lb', +'^images\.google\.com\.lb$','images_google_com_lb', +'google\.com\.lb$','google_com_lb', +'^www\.google\.com\.lc$','www_google_com_lc', +'^images\.google\.com\.lc$','images_google_com_lc', +'google\.com\.lc$','google_com_lc', +'^www\.google\.com\.ly$','www_google_com_ly', +'^images\.google\.com\.ly$','images_google_com_ly', +'google\.com\.ly$','google_com_ly', +'^www\.google\.com\.mm$','www_google_com_mm', +'^images\.google\.com\.mm$','images_google_com_mm', +'google\.com\.mm$','google_com_mm', +'^www\.google\.com\.mt$','www_google_com_mt', +'^images\.google\.com\.mt$','images_google_com_mt', +'google\.com\.mt$','google_com_mt', +'^www\.google\.com\.mx$','www_google_com_mx', +'^images\.google\.com\.mx$','images_google_com_mx', +'google\.com\.mx$','google_com_mx', +'^www\.google\.com\.my$','www_google_com_my', +'^images\.google\.com\.my$','images_google_com_my', +'google\.com\.my$','google_com_my', +'^www\.google\.com\.na$','www_google_com_na', +'^images\.google\.com\.na$','images_google_com_na', +'google\.com\.na$','google_com_na', +'^www\.google\.com\.nf$','www_google_com_nf', +'^images\.google\.com\.nf$','images_google_com_nf', +'google\.com\.nf$','google_com_nf', +'^www\.google\.com\.ng$','www_google_com_ng', +'^images\.google\.com\.ng$','images_google_com_ng', +'google\.com\.ng$','google_com_ng', +'^www\.google\.com\.ni$','www_google_com_ni', +'^images\.google\.com\.ni$','images_google_com_ni', +'google\.com\.ni$','google_com_ni', +'^www\.google\.com\.np$','www_google_com_np', +'^images\.google\.com\.np$','images_google_com_np', +'google\.com\.np$','google_com_np', +'^www\.google\.com\.om$','www_google_com_om', +'^images\.google\.com\.om$','images_google_com_om', +'google\.com\.om$','google_com_om', +'^www\.google\.com\.pa$','www_google_com_pa', +'^images\.google\.com\.pa$','images_google_com_pa', +'google\.com\.pa$','google_com_pa', +'^www\.google\.com\.pe$','www_google_com_pe', +'^images\.google\.com\.pe$','images_google_com_pe', +'google\.com\.pe$','google_com_pe', +'^www\.google\.com\.pg$','www_google_com_pg', +'^images\.google\.com\.pg$','images_google_com_pg', +'google\.com\.pg$','google_com_pg', +'^www\.google\.com\.ph$','www_google_com_ph', +'^images\.google\.com\.ph$','images_google_com_ph', +'google\.com\.ph$','google_com_ph', +'^www\.google\.com\.pk$','www_google_com_pk', +'^images\.google\.com\.pk$','images_google_com_pk', +'google\.com\.pk$','google_com_pk', +'^www\.google\.com\.pr$','www_google_com_pr', +'^images\.google\.com\.pr$','images_google_com_pr', +'google\.com\.pr$','google_com_pr', +'^www\.google\.com\.py$','www_google_com_py', +'^images\.google\.com\.py$','images_google_com_py', +'google\.com\.py$','google_com_py', +'^www\.google\.com\.qa$','www_google_com_qa', +'^images\.google\.com\.qa$','images_google_com_qa', +'google\.com\.qa$','google_com_qa', +'^www\.google\.com\.sa$','www_google_com_sa', +'^images\.google\.com\.sa$','images_google_com_sa', +'google\.com\.sa$','google_com_sa', +'^www\.google\.com\.sb$','www_google_com_sb', +'^images\.google\.com\.sb$','images_google_com_sb', +'google\.com\.sb$','google_com_sb', +'^www\.google\.com\.sg$','www_google_com_sg', +'^images\.google\.com\.sg$','images_google_com_sg', +'google\.com\.sg$','google_com_sg', +'^www\.google\.com\.sl$','www_google_com_sl', +'^images\.google\.com\.sl$','images_google_com_sl', +'google\.com\.sl$','google_com_sl', +'^www\.google\.com\.sv$','www_google_com_sv', +'^images\.google\.com\.sv$','images_google_com_sv', +'google\.com\.sv$','google_com_sv', +'^www\.google\.com\.tj$','www_google_com_tj', +'^images\.google\.com\.tj$','images_google_com_tj', +'google\.com\.tj$','google_com_tj', +'^www\.google\.com\.tw$','www_google_com_tw', +'^images\.google\.com\.tw$','images_google_com_tw', +'google\.com\.tw$','google_com_tw', +'^www\.google\.com\.ua$','www_google_com_ua', +'^images\.google\.com\.ua$','images_google_com_ua', +'google\.com\.ua$','google_com_ua', +'^www\.google\.com\.uy$','www_google_com_uy', +'^images\.google\.com\.uy$','images_google_com_uy', +'google\.com\.uy$','google_com_uy', +'^www\.google\.com\.vc$','www_google_com_vc', +'^images\.google\.com\.vc$','images_google_com_vc', +'google\.com\.vc$','google_com_vc', +'^www\.google\.com\.vn$','www_google_com_vn', +'^images\.google\.com\.vn$','images_google_com_vn', +'google\.com\.vn$','google_com_vn', +'^www\.google\.cv$','www_google_cv', +'^images\.google\.cv$','images_google_cv', +'google\.cv$','google_cv', +'^www\.google\.cz$','www_google_cz', +'^images\.google\.cz$','images_google_cz', +'google\.cz$','google_cz', +'^www\.google\.dj$','www_google_dj', +'^images\.google\.dj$','images_google_dj', +'google\.dj$','google_dj', +'^www\.google\.dk$','www_google_dk', +'^images\.google\.dk$','images_google_dk', +'google\.dk$','google_dk', +'^www\.google\.dm$','www_google_dm', +'^images\.google\.dm$','images_google_dm', +'google\.dm$','google_dm', +'^www\.google\.dz$','www_google_dz', +'^images\.google\.dz$','images_google_dz', +'google\.dz$','google_dz', +'^www\.google\.ee$','www_google_ee', +'^images\.google\.ee$','images_google_ee', +'google\.ee$','google_ee', +'^www\.google\.fi$','www_google_fi', +'^images\.google\.fi$','images_google_fi', +'google\.fi$','google_fi', +'^www\.google\.fm$','www_google_fm', +'^images\.google\.fm$','images_google_fm', +'google\.fm$','google_fm', +'^www\.google\.ga$','www_google_ga', +'^images\.google\.ga$','images_google_ga', +'google\.ga$','google_ga', +'^www\.google\.ge$','www_google_ge', +'^images\.google\.ge$','images_google_ge', +'google\.ge$','google_ge', +'^www\.google\.gf$','www_google_gf', +'^images\.google\.gf$','images_google_gf', +'google\.gf$','google_gf', +'^www\.google\.gg$','www_google_gg', +'^images\.google\.gg$','images_google_gg', +'google\.gg$','google_gg', +'^www\.google\.gl$','www_google_gl', +'^images\.google\.gl$','images_google_gl', +'google\.gl$','google_gl', +'^www\.google\.gm$','www_google_gm', +'^images\.google\.gm$','images_google_gm', +'google\.gm$','google_gm', +'^www\.google\.gp$','www_google_gp', +'^images\.google\.gp$','images_google_gp', +'google\.gp$','google_gp', +'^www\.google\.gy$','www_google_gy', +'^images\.google\.gy$','images_google_gy', +'google\.gy$','google_gy', +'^www\.google\.hn$','www_google_hn', +'^images\.google\.hn$','images_google_hn', +'google\.hn$','google_hn', +'^www\.google\.ht$','www_google_ht', +'^images\.google\.ht$','images_google_ht', +'google\.ht$','google_ht', +'^www\.google\.hu$','www_google_hu', +'^images\.google\.hu$','images_google_hu', +'google\.hu$','google_hu', +'^www\.google\.im$','www_google_im', +'^images\.google\.im$','images_google_im', +'google\.im$','google_im', +'^www\.google\.io$','www_google_io', +'^images\.google\.io$','images_google_io', +'google\.io$','google_io', +'^www\.google\.iq$','www_google_iq', +'^images\.google\.iq$','images_google_iq', +'google\.iq$','google_iq', +'^www\.google\.is$','www_google_is', +'^images\.google\.is$','images_google_is', +'google\.is$','google_is', +'^www\.google\.je$','www_google_je', +'^images\.google\.je$','images_google_je', +'google\.je$','google_je', +'^www\.google\.jo$','www_google_jo', +'^images\.google\.jo$','images_google_jo', +'google\.jo$','google_jo', +'^www\.google\.kg$','www_google_kg', +'^images\.google\.kg$','images_google_kg', +'google\.kg$','google_kg', +'^www\.google\.ki$','www_google_ki', +'^images\.google\.ki$','images_google_ki', +'google\.ki$','google_ki', +'^www\.google\.kz$','www_google_kz', +'^images\.google\.kz$','images_google_kz', +'google\.kz$','google_kz', +'^www\.google\.la$','www_google_la', +'^images\.google\.la$','images_google_la', +'google\.la$','google_la', +'^www\.google\.li$','www_google_li', +'^images\.google\.li$','images_google_li', +'google\.li$','google_li', +'^www\.google\.lk$','www_google_lk', +'^images\.google\.lk$','images_google_lk', +'google\.lk$','google_lk', +'^www\.google\.lt$','www_google_lt', +'^images\.google\.lt$','images_google_lt', +'google\.lt$','google_lt', +'^www\.google\.lu$','www_google_lu', +'^images\.google\.lu$','images_google_lu', +'google\.lu$','google_lu', +'^www\.google\.lv$','www_google_lv', +'^images\.google\.lv$','images_google_lv', +'google\.lv$','google_lv', +'^www\.google\.md$','www_google_md', +'^images\.google\.md$','images_google_md', +'google\.md$','google_md', +'^www\.google\.me$','www_google_me', +'^images\.google\.me$','images_google_me', +'google\.me$','google_me', +'^www\.google\.mg$','www_google_mg', +'^images\.google\.mg$','images_google_mg', +'google\.mg$','google_mg', +'^www\.google\.mk$','www_google_mk', +'^images\.google\.mk$','images_google_mk', +'google\.mk$','google_mk', +'^www\.google\.ml$','www_google_ml', +'^images\.google\.ml$','images_google_ml', +'google\.ml$','google_ml', +'^www\.google\.mn$','www_google_mn', +'^images\.google\.mn$','images_google_mn', +'google\.mn$','google_mn', +'^www\.google\.ms$','www_google_ms', +'^images\.google\.ms$','images_google_ms', +'google\.ms$','google_ms', +'^www\.google\.mu$','www_google_mu', +'^images\.google\.mu$','images_google_mu', +'google\.mu$','google_mu', +'^www\.google\.mv$','www_google_mv', +'^images\.google\.mv$','images_google_mv', +'google\.mv$','google_mv', +'^www\.google\.mw$','www_google_mw', +'^images\.google\.mw$','images_google_mw', +'google\.mw$','google_mw', +'^www\.google\.ne$','www_google_ne', +'^images\.google\.ne$','images_google_ne', +'google\.ne$','google_ne', +'^www\.google\.nr$','www_google_nr', +'^images\.google\.nr$','images_google_nr', +'google\.nr$','google_nr', +'^www\.google\.nu$','www_google_nu', +'^images\.google\.nu$','images_google_nu', +'google\.nu$','google_nu', +'^www\.google\.pn$','www_google_pn', +'^images\.google\.pn$','images_google_pn', +'google\.pn$','google_pn', +'^www\.google\.ps$','www_google_ps', +'^images\.google\.ps$','images_google_ps', +'google\.ps$','google_ps', +'^www\.google\.ro$','www_google_ro', +'^images\.google\.ro$','images_google_ro', +'google\.ro$','google_ro', +'^www\.google\.rs$','www_google_rs', +'^images\.google\.rs$','images_google_rs', +'google\.rs$','google_rs', +'^www\.google\.ru$','www_google_ru', +'^images\.google\.ru$','images_google_ru', +'google\.ru$','google_ru', +'^www\.google\.rw$','www_google_rw', +'^images\.google\.rw$','images_google_rw', +'google\.rw$','google_rw', +'^www\.google\.sc$','www_google_sc', +'^images\.google\.sc$','images_google_sc', +'google\.sc$','google_sc', +'^www\.google\.sh$','www_google_sh', +'^images\.google\.sh$','images_google_sh', +'google\.sh$','google_sh', +'^www\.google\.si$','www_google_si', +'^images\.google\.si$','images_google_si', +'google\.si$','google_si', +'^www\.google\.sk$','www_google_sk', +'^images\.google\.sk$','images_google_sk', +'google\.sk$','google_sk', +'^www\.google\.sm$','www_google_sm', +'^images\.google\.sm$','images_google_sm', +'google\.sm$','google_sm', +'^www\.google\.sn$','www_google_sn', +'^images\.google\.sn$','images_google_sn', +'google\.sn$','google_sn', +'^www\.google\.so$','www_google_so', +'^images\.google\.so$','images_google_so', +'google\.so$','google_so', +'^www\.google\.sr$','www_google_sr', +'^images\.google\.sr$','images_google_sr', +'google\.sr$','google_sr', +'^www\.google\.st$','www_google_st', +'^images\.google\.st$','images_google_st', +'google\.st$','google_st', +'^www\.google\.td$','www_google_td', +'^images\.google\.td$','images_google_td', +'google\.td$','google_td', +'^www\.google\.tg$','www_google_tg', +'^images\.google\.tg$','images_google_tg', +'google\.tg$','google_tg', +'^www\.google\.tk$','www_google_tk', +'^images\.google\.tk$','images_google_tk', +'google\.tk$','google_tk', +'^www\.google\.tl$','www_google_tl', +'^images\.google\.tl$','images_google_tl', +'google\.tl$','google_tl', +'^www\.google\.tm$','www_google_tm', +'^images\.google\.tm$','images_google_tm', +'google\.tm$','google_tm', +'^www\.google\.tn$','www_google_tn', +'^images\.google\.tn$','images_google_tn', +'google\.tn$','google_tn', +'^www\.google\.to$','www_google_to', +'^images\.google\.to$','images_google_to', +'google\.to$','google_to', +'^www\.google\.tt$','www_google_tt', +'^images\.google\.tt$','images_google_tt', +'google\.tt$','google_tt', +'^www\.google\.us$','www_google_us', +'^images\.google\.us$','images_google_us', +'google\.us$','google_us', +'^www\.google\.vg$','www_google_vg', +'^images\.google\.vg$','images_google_vg', +'google\.vg$','google_vg', +'^www\.google\.vu$','www_google_vu', +'^images\.google\.vu$','images_google_vu', +'google\.vu$','google_vu', +'^www\.google\.ws$','www_google_ws', +'^images\.google\.ws$','images_google_ws', +'google\.ws$','google_ws', + +'au\.search\.yahoo\.com','au_search_yahoo_com', +'ca\.search\.yahoo\.com','ca_search_yahoo_com', +'fr\.search\.yahoo\.com','fr_search_yahoo_com', +'image\.search\.yahoo\.co\.jp','image_search_yahoo_co_jp', +'images\.search\.yahoo\.com','images_search_yahoo_com', +'it\.search\.yahoo\.com','it_search_yahoo_com', +'ph\.search\.yahoo\.com','ph_search_yahoo_com', +'se\.search\.yahoo\.com','se_search_yahoo_com', +'search\.yahoo\.co\.jp','search_yahoo_co_jp', +'sg\.search\.yahoo\.com','sg_search_yahoo_com', +'sg\.yhs4\.search\.yahoo\.com','sg_yhs4_search_yahoo_com', +'tw\.images\.search\.yahoo\.com','tw_images_search_yahoo_com', +'uk\.search\.yahoo\.com','uk_search_yahoo_com', +'uk\.yhs4\.search\.yahoo\.com','uk_yhs4_search_yahoo_com', +'us\.search\.yahoo\.com','us_search_yahoo_com', +'search\.yahoo\.com','search_yahoo_com', +'search\.yahoo','yahoo_catchall', + +'babylon\.com','babylon', +'search\.conduit\.com','conduit', +'avg\.com','avg', +'mywebsearch\.com','mywebsearch', + +'msn\.','msn', +'live\.com','live', + +'search\.aol\.co\.uk','aoluk', +'searcht\.aol\.co\.uk','aoluk', +'searcht\.aol\.com','aolcom', +'search\.aol\.com','aolcom', +'recherche\.aol\.fr','aolfr', +'suche\.aol\.de','aolde', +'de\.aolsearch\.com','aolsearchde', +'sucheaol\.aol\.de','sucheaolde', +'search\.hp\.my\.aol\.co\.uk','hp_my_aol', +'search\.aol\.pl','aolpl', +'o2suche\.aol\.de','aol_o2suche_de', +'search\.aol\.','aolcatchall', + +'^uk\.ask\.com$','askuk', +'^de\.ask\.com$','askde', +'tb\.ask\.com$','tbask', +'^images\.ask\.com$','askimages', + +'base\.google\.','google_base', +'froogle\.google\.','google_froogle', +'google\.[\w.]+/products','google_products', +'googlecom\.com','google_catchall', +'groups\.google\.','google_groups', +'googlee\.','google_catchall', + +'216\.239\.32\.20','google_catchall', +'173\.194\.32\.223','google_catchall', +'216\.239\.(35|37|39|51)\.100','google_cache', +'216\.239\.(35|37|39|51)\.101','google_cache', +'216\.239\.5[0-9]\.104','google_cache', +'64\.233\.1[0-9]{2}\.104','google_cache', +'66\.102\.[1-9]\.104','google_cache', +'66\.249\.93\.104','google_cache', +'72\.14\.2[0-9]{2}\.104','google_cache', + +'maps\.google','google_maps', +'translate\.google','google_translate', +'173\.194\.35\.177','googleByIP', +'google\.','google_catchall', + +'mail.yahoo.net','yahoo_catchall', +'(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)','yahoo_catchall', +'mindset\.research\.yahoo','yahoo_mindset', +'yahoo\.','yahoo_catchall', +'^www\.ask\.jp$','askjp', +'^es\.ask\.com$','askes', +'^fr\.ask\.com$','askfr', +'^www\.iask\.com$','iask', +'^it\.ask\.com$','askit', +'^nl\.ask\.com$','asknl', +'(^|\.)ask\.com$','askcatchall', + +'www\.tesco\.net','tesco', +'yell\.','yell', +'zapmeta\.ch','zapmeta_ch', +'zapmeta\.com','zapmeta_com', +'zapmeta\.de','zapmeta_de', +'zapmeta','zapmeta_catchall', + +'(^|\.)go\.com','go', +'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)','metacrawler_de', +'\.facemoods\.com','facemoods', +'\.funmoods\.com','funmoods', +'\.metasearch\.','metasearch', +'\.wow\.com','wowuk', +'163\.com','netease', +'1klik\.dk','1klik', +'1search-board\.com','1search', +'212\.227\.33\.241','metaspinner', +'3721\.com','3721', +'4\-counter\.com','google4counter', +'a9\.com','a9', +'accoona\.com','accoona', +'alexa\.com','alexa', +'allesklar\.de','allesklar', +'alltheweb\.com','alltheweb', +'altavista\.','altavista', +'amazon\.','amazon', +'androidsearch\.com','androidsearch', +'answerbus\.com','answerbus', +'anzwers\.com\.au','anzwers', +'aport\.ru','aport', +'arianna\.libero\.it','arianna', +'as\.starware\.com','comettoolbar', +'asevenboard\.com','asevenboard', +'atlanticbb\.net','atlanticbb', +'atlas\.cz','atlas', +'atomz\.','atomz', +'att\.net','att', +'auone\.jp','auone', +'avantfind\.com','avantfind', +'baidu\.com','baidu', +'bbc\.co\.uk/cgi-bin/search','bbc', +'biglotron\.com', 'biglotron', +'blekko\.com','blekko', +'blingo\.com','blingo', +'bungeebonesdotcom','bungeebonesdotcom', +'centraldatabase\.org','centraldatabase', +'centrum\.cz','centrum', +'centurylink\.net','centurylink', +'charter\.net','charter', +'chatzum\.com','chatzum', +'checkparams\.com','checkparams', +'chello\.at','chelloat', +'chello\.be','chellobe', +'chello\.cz','chellocz', +'chello\.fr','chellofr', +'chello\.hu','chellohu', +'chello\.nl','chellonl', +'chello\.no','chellono', +'chello\.pl','chellopl', +'chello\.se','chellose', +'chello\.sk','chellosk', +'chello','chellocom', +'claro-search\.com','clarosearch', +'clinck\.in','clinck', +'clusty\.com','clusty', +'copernic\.com','copernic', +'crawler\.com','crawler', +'ctrouve\.','ctrouve', +'dalesearch\.com','dalesearch', +'danielsen\.com','danielsen', +'daum\.net','daum', +'de\.dolphin\.com','de_dolphin_com', +'de\.wiki\.gov\.cn','de_wiki_gov_cn', +'de\.wow\.com','wowsearch', +'dejanews\.','dejanews', +'del\.icio\.us','delicious', +'delta-search','delta-search', +'digg\.com','digg', +'dmoz\.org','dmoz', +'dodaj\.pl','dodajpl', +'dogpile\.com','dogpile', +'duckduckgo','duckduckgo', +'easysearch\.org\.uk','easysearch', +'ecosia\.org','ecosia', +'edderkoppen\.dk','edderkoppen', +'engine\.exe','engine', +'eniro\.no','eniro', +'eniro\.se','enirose', +'ereadingsource\.com','ereadingsource', +'es\.mirago\.com','miragoes', +'etools\.ch','etools_ch', +'euroseek\.','euroseek', +'everyclick\.com','everyclick', +'evreka\.passagen\.se','passagen', +'excite\.','excite', +'extern\.peoplecheck\.de','peoplecheck_de', +'fastbot\.de','fastbot_de', +'find\.dk','finddk', +'find1friend\.com','find1friend', +'findamo\.com','findamo', +'findarticles\.com','findarticles', +'fireball\.de','fireball', +'forums\.iboats\.com','iboats', +'foxstart\.com','foxstart', +'francite\.','francite', +'gazeta\.pl','gazetapl', +'gery\.pl','gerypl', +'globososo\.','globososo', +'go\.mail\.ru','go.mail.ru', +'go\.speedbit\.com','speedbit', +'go2net\.com','go2net', +'godado','godado', +'goggle\.co\.hu$','goggle (what)', +'goliat\.hu','goliat', +'goodsearch\.com','goodsearch', +'gotuneed\.com','gotuneed', +'haku\.www\.fi','haku', +'heureka\.hu','heureka', +'hoga\.pl','hogapl', +'hotbot\.','hotbot', +'hubwe\.net','hubwe', +'icerocket\.com','icerocket', +'icq\.com\/search','icq', +'ifind\.freeserve','freeserve', +'ilse\.','ilse', +'inbox\.com','inbox', +'index\.hu','indexhu', +'ineffabile\.it','ineffabile', +'info\.co\.uk','infouk', +'infoseek\.de','infoseek', +'infospace\.com','infospace', +'inspsearch\.com','inspsearch', +'int\.search\.myway\.com','int_search_myway_com', +'int\.search-results\.com','nortonsavesearch', +'interia\.pl','interiapl', +'isearch\.nation\.com','isearch_nation_com', +'i-une\.com','iune', +'ixquick\.com','ixquick', +'izito\.co\.uk','izito_uk', +'izito\.co\.de','izito_co_de', +'izito\.','izito_catchall', +'jubii\.dk','jubii', +'jumpy\.it','jumpyit', +'juno\.com','juno', +'jyxo\.(cz|com)','jyxo', +'kartoo\.com','kartoo', +'katalog\.onet\.pl','katalogonetpl', +'kataweb\.it','kataweb', +'kereso\.startlap\.hu','startlap_hu', +'keresolap\.hu','keresolap_hu', +'kvasir\.','kvasir', +'kvitters\.','kvitters', +'lapkereso\.hu','lapkereso_hu', +'lbb\.org','lbb', +'ledix\.net','ledix', +'libero\.it/','libero', +'localmoxie\.com','localmoxie', +'looksmart\.co\.uk','looksmart_co_uk', +'looksmart\.','looksmart_catchall', +'lycos\.','lycos', +'mamma\.','mamma', +'meinestadt\.de','meinestadt', +'meta\.ua','metaua', +'metabot\.ru','metabot', +'metacrawler\.','metacrawler', +'metager\.de','metager', +'miner\.bol\.com\.br','miner', +'mirago\.be','miragobe', +'mirago\.ch','miragoch', +'mirago\.co\.uk','miragocouk', +'mirago\.de','miragode', +'mirago\.dk','miragodk', +'mirago\.fr','miragofr', +'mirago\.it','miragoit', +'mirago\.nl','miragonl', +'mirago\.se','miragose', +'mirago','mirago', +'mitrasites\.com','mitrasites', +'mozbot\.fr', 'mozbot', +'my\.allgameshome\.com','allgameshome', +'mys\.yoursearch\.me','mys_yoursearch_me', +'mysearch\.','mysearch', +'mysearchdial\.com','mysearchdial', +'mysearchresults\.com','mysearchresults', +'myway\.com','myway', +'najdi\.to','najdi', +'nation\.','nation', +'navigationshilfe\.t-online\.de','navigationshilfe_t_online', +'nbci\.com\/search','nbci', +'netluchs\.de','netluchs', +'netscape\.','netscape', +'netsprint\.pl','netsprintpl', +'netstjernen\.dk','netstjernen', +'netzero\.net','netzero', +'no\.mirago\.com','miragono', +'northernlight\.','northernlight', +'nusearch\.com','nusearch', +'o2\.pl','o2pl', +'ofir\.dk','ofir', +'opasia\.dk','opasia', +'orangeworld\.co\.uk','orangeworld', +'orbis\.dk','orbis', +'overture\.com','overture', +'pch\.com','pch', +'picsearch\.de','picsearch', +'pictures\.com','pictures', +'plusnetwork\.com','plusnetwork', +'pogodak\.','pogodak', +'polska\.pl','polskapl', +'polymeta\.hu','polymeta_hu', +'preciobarato\.xyz','preciobarato_xyz', +'questionanswering\.com','questionanswering', +'quick\.cz','quick', +'rambler\.ru','rambler', +'recherche\.club-internet\.fr','clubinternet', +'rechercher\.libertysurf\.fr','libertysurf', +'redbox\.cz','redbox', +'rr\.com','rr', +'sagool\.jp','sagool', +'sapo\.pt','sapo', +'schoenerbrausen\.de','schoenerbrausen', +'scroogle\.org','scroogle', +'search[\w\-]+\.free\.fr','free', +'search\.1und1\.de','search_1und1_de', +'search\.alice\.it\.master','aliceitmaster', +'search\.alice\.it','aliceit', +'search\.alot\.com','alot', +'search\.bluewin\.ch','bluewin', +'search\.bt\.com','bt', +'search\.certified-toolbar\.com','certifiedtoolbarsearch', +'search\.ch','searchch', +'search\.comcast\.net','comcast', +'search\.earthlink\.net','earthlink', +'search\.fbdownloader\.com','fbdownloader', +'search\.fdownloadr\.com','fdownloadr_com', +'search\.foxtab\.com','search_foxtab_com', +'search\.genieo\.com','genieo', +'search\.goo\.ne\.jp','ichiro', +'search\.handycafe\.com','handycafe', +'search\.incredibar\.com','incredibar', +'search\.incredimail\.com','incredimail', +'search\.internetto\.hu','internetto', +'search\.orange\.co\.uk','orange', +'search\.sky\.com','sky', +'search\.sli\.sympatico\.ca','sympatico', +'search\.socialdownloadr\.com','search_socialdownloadr_com', +'search\.sweetim\.com','sweetim', +'search\.terra\.','terra', +'search\.zonealarm\.com','search_zonealarm_com', +'searchalgo\.com','searchalgo', +'searchalot\.com','searchalot', +'searchcompletion\.com','searchcompletion', +'searches\.qone8\.com','searches_qone8_com', +'searches\.safehomepage\.com','safehomepage_com', +'searches\.vi-view\.com','vi-view_com', +'searchesnavigator\.com','searchesnavigator', +'searchgol\.com','searchgol', +'searchlistingsite\.com','searchlistingsite', +'searchmobileonline\.com','searchmobileonline', +'search-results\.com','searchresultscom', +'search-results\.mobi','search-results_mobi', +'searchsafer\.com','searchsafer', +'searchy\.co\.uk','searchy', +'searchya\.com','searchya', +'segnalo\.alice\.it','segnalo', +'semalt\.com','semalt', +'sensis\.com\.au','sensis', +'seznam\.cz','seznam', +'shinyseek\.it','shinyseek\.it', +'shoppstop\.com','shoppstop', +'sify\.com','sify', +'sm\.de','smde', +'smartsuggestor\.com','smartsuggestor', +'snapdo\.com','snapdo', +'softonic\.com','softonic', +'sogou\.com','sogou', +'sok\.start\.no','start', +'sol\.dk','sol', +'soso\.com','soso', +'sphere\.com','sphere', +'splut\.','splut', +'spotjockey\.','spotjockey', +'spray\.','spray', +'sr\.searchfunmoods\.com','searchfunmoods', +'start\.iminent\.com','iminent', +'start\.shaw\.ca','shawca', +'start\.toshiba\.com','toshiba', +'startpage\.com','startpage', +'startsiden\.no','startsiden', +'static\.flipora\.com','flipora', +'steadysearch\.com','steadysearch', +'steady-search\.com','steadysearch', +'stumbleupon\.com','stumbleupon', +'suche\.1und1\.de','1und1_de', +'suche\.freenet\.de','freenet_de', +'suche\.gmx\.at','gmxsuche_at', +'suche\.gmx\.net','gmxsuche', +'suche\d?\.web\.de','webde', +'suchen\.abacho\.de','abacho', +'sumaja\.de','sumaja', +'supereva\.com','supereva', +'surfcanyon\.com','surfcanyon_com', +'sweetpacks-search\.com','sweetpacks', +'swik\.net','swik', +'swisscows\.ch','swisscows_ch', +'szukacz\.pl','szukaczpl', +'szukaj\.onet\.pl','onetpl', +'szukaj\.wp\.pl','wp', +'talktalk\.co\.uk','talktalk_uk', +'tango\.hu','tango_hu', +'teecno\.it','teecnoit', +'teoma\.','teoma', +'theallsearches\.com','theallsearches', +'three\.co\.uk','three', +'tiscali\.','tiscali', +'tixuma\.de','tixuma_de', +'toile\.com','toile', +'t-online\.de','t_online_de', +'t-online','t_online_catchall', +'turtle\.ru','turtle', +'tyfon\.dk','tyfon', +'uk\.foxstart\.com','uk_foxstart_com', +'ukdirectory\.','ukdirectory', +'ukindex\.co\.uk','ukindex', +'ukplus\.','ukplus', +'umfis\.de','umfis', +'umuwa\.de','umuwa_de', +'uni-hannover\.de','metahannover', +'vindex\.','vindex', +'virgilio\.it','virgilio', +'virginmedia\.com','virginmedia', +'vivisimo\.com','vivisimo', +'vizsla\.origo\.hu','origo', +'vnet\.cn','vnet', +'voila\.','voila', +'wahoo\.hu','wahoo', +'webalta\.ru','webalta', +'webcrawler\.','webcrawler', +'webmania\.hu','webmania', +'websearch\.rakuten\.co\.jp','rakuten', +'whorush\.com','whorush_com', +'windowssearch\.com','windowssearch_com', +'wisenut\.com','wisenut', +'wow\.pl','wowpl', +'wow\.utop\.it','wow_utop_it', +'www\.benefind\.de','benefind', +'www\.buenosearch\.com','www_buenosearch_com', +'www\.dregol\.com','www_dregol_com', +'www\.govome\.com','govome', +'www\.holasearch\.com','holasearch', +'www\.metasuche\.ch','metasuche_ch', +'www\.oneseek\.de','oneseek_de', +'www\.qwant\.com','qwant_com', +'www\.search\.com','search.com', +'www\.startxxl\.com','startxxl', +'www\.vlips\.de','vlips_de', +'www\.wow\.com','www_wow_com', +'www1\.search-results\.com','searchresults', +'wwweasel\.de','wwweasel', +'yourbestsearch\.net','yourbestsearch', +'youtube\.com','youtube', +'zhongsou\.com','zhongsou', +'zoeken\.nl','zoeken', +'zoznam\.sk','zoznam', +# Generic search engines +'search\..*\.\w+','search' +); + +# SearchEnginesWithKeysNotInQuery +# List of search engines that store keyword as page instead of query parameter +#------------------------------------------------------------------------------ +%SearchEnginesWithKeysNotInQuery=( +'a9',1 # www.a9.com/searckey1%20searchkey2 +); + +# SearchEnginesKnownUrl +# Known rules to extract keywords from a referrer search engine URL +#------------------------------------------------------------------------------ +%SearchEnginesKnownUrl=( +# Query keys + +'www_google_co_uk','(p|q|as_p|as_q)=', +'images_google_co_uk','(p|q|as_p|as_q)=', +'google_co_uk','(p|q|as_p|as_q)=', +'www_google_com','(p|q|as_p|as_q)=', +'images_google_com','(p|q|as_p|as_q)=', +'google_com','(p|q|as_p|as_q)=', +'www_google_de','(p|q|as_p|as_q)=', +'images_google_de','(p|q|as_p|as_q)=', +'google_de','(p|q|as_p|as_q)=', +'www_google_fr','(p|q|as_p|as_q)=', +'images_google_fr','(p|q|as_p|as_q)=', +'google_fr','(p|q|as_p|as_q)=', +'www_google_ca','(p|q|as_p|as_q)=', +'images_google_ca','(p|q|as_p|as_q)=', +'google_ca','(p|q|as_p|as_q)=', +'www_google_es','(p|q|as_p|as_q)=', +'images_google_es','(p|q|as_p|as_q)=', +'google_es','(p|q|as_p|as_q)=', +'www_google_com_au','(p|q|as_p|as_q)=', +'images_google_com_au','(p|q|as_p|as_q)=', +'google_com_au','(p|q|as_p|as_q)=', +'www_google_nl','(p|q|as_p|as_q)=', +'images_google_nl','(p|q|as_p|as_q)=', +'google_nl','(p|q|as_p|as_q)=', +'www_google_gr','(p|q|as_p|as_q)=', +'images_google_gr','(p|q|as_p|as_q)=', +'google_gr','(p|q|as_p|as_q)=', +'www_google_se','(p|q|as_p|as_q)=', +'images_google_se','(p|q|as_p|as_q)=', +'google_se','(p|q|as_p|as_q)=', +'www_google_ie','(p|q|as_p|as_q)=', +'images_google_ie','(p|q|as_p|as_q)=', +'google_ie','(p|q|as_p|as_q)=', +'www_google_it','(p|q|as_p|as_q)=', +'images_google_it','(p|q|as_p|as_q)=', +'google_it','(p|q|as_p|as_q)=', +'www_google_no','(p|q|as_p|as_q)=', +'images_google_no','(p|q|as_p|as_q)=', +'google_no','(p|q|as_p|as_q)=', +'www_google_com_tr','(p|q|as_p|as_q)=', +'images_google_com_tr','(p|q|as_p|as_q)=', +'google_com_tr','(p|q|as_p|as_q)=', +'www_google_co_in','(p|q|as_p|as_q)=', +'images_google_co_in','(p|q|as_p|as_q)=', +'google_co_in','(p|q|as_p|as_q)=', +'www_google_pt','(p|q|as_p|as_q)=', +'images_google_pt','(p|q|as_p|as_q)=', +'google_pt','(p|q|as_p|as_q)=', +'www_google_hr','(p|q|as_p|as_q)=', +'images_google_hr','(p|q|as_p|as_q)=', +'google_hr','(p|q|as_p|as_q)=', +'www_google_co_nz','(p|q|as_p|as_q)=', +'images_google_co_nz','(p|q|as_p|as_q)=', +'google_co_nz','(p|q|as_p|as_q)=', +'www_google_pl','(p|q|as_p|as_q)=', +'images_google_pl','(p|q|as_p|as_q)=', +'google_pl','(p|q|as_p|as_q)=', +'www_google_ac','(p|q|as_p|as_q)=', +'images_google_ac','(p|q|as_p|as_q)=', +'google_ac','(p|q|as_p|as_q)=', +'www_google_ad','(p|q|as_p|as_q)=', +'images_google_ad','(p|q|as_p|as_q)=', +'google_ad','(p|q|as_p|as_q)=', +'www_google_ae','(p|q|as_p|as_q)=', +'images_google_ae','(p|q|as_p|as_q)=', +'google_ae','(p|q|as_p|as_q)=', +'www_google_al','(p|q|as_p|as_q)=', +'images_google_al','(p|q|as_p|as_q)=', +'google_al','(p|q|as_p|as_q)=', +'www_google_am','(p|q|as_p|as_q)=', +'images_google_am','(p|q|as_p|as_q)=', +'google_am','(p|q|as_p|as_q)=', +'www_google_as','(p|q|as_p|as_q)=', +'images_google_as','(p|q|as_p|as_q)=', +'google_as','(p|q|as_p|as_q)=', +'www_google_at','(p|q|as_p|as_q)=', +'images_google_at','(p|q|as_p|as_q)=', +'google_at','(p|q|as_p|as_q)=', +'www_google_az','(p|q|as_p|as_q)=', +'images_google_az','(p|q|as_p|as_q)=', +'google_az','(p|q|as_p|as_q)=', +'www_google_ba','(p|q|as_p|as_q)=', +'images_google_ba','(p|q|as_p|as_q)=', +'google_ba','(p|q|as_p|as_q)=', +'www_google_be','(p|q|as_p|as_q)=', +'images_google_be','(p|q|as_p|as_q)=', +'google_be','(p|q|as_p|as_q)=', +'www_google_bf','(p|q|as_p|as_q)=', +'images_google_bf','(p|q|as_p|as_q)=', +'google_bf','(p|q|as_p|as_q)=', +'www_google_bg','(p|q|as_p|as_q)=', +'images_google_bg','(p|q|as_p|as_q)=', +'google_bg','(p|q|as_p|as_q)=', +'www_google_bi','(p|q|as_p|as_q)=', +'images_google_bi','(p|q|as_p|as_q)=', +'google_bi','(p|q|as_p|as_q)=', +'www_google_bj','(p|q|as_p|as_q)=', +'images_google_bj','(p|q|as_p|as_q)=', +'google_bj','(p|q|as_p|as_q)=', +'www_google_bs','(p|q|as_p|as_q)=', +'images_google_bs','(p|q|as_p|as_q)=', +'google_bs','(p|q|as_p|as_q)=', +'www_google_bt','(p|q|as_p|as_q)=', +'images_google_bt','(p|q|as_p|as_q)=', +'google_bt','(p|q|as_p|as_q)=', +'www_google_by','(p|q|as_p|as_q)=', +'images_google_by','(p|q|as_p|as_q)=', +'google_by','(p|q|as_p|as_q)=', +'www_google_cat','(p|q|as_p|as_q)=', +'images_google_cat','(p|q|as_p|as_q)=', +'google_cat','(p|q|as_p|as_q)=', +'www_google_cc','(p|q|as_p|as_q)=', +'images_google_cc','(p|q|as_p|as_q)=', +'google_cc','(p|q|as_p|as_q)=', +'www_google_cd','(p|q|as_p|as_q)=', +'images_google_cd','(p|q|as_p|as_q)=', +'google_cd','(p|q|as_p|as_q)=', +'www_google_cf','(p|q|as_p|as_q)=', +'images_google_cf','(p|q|as_p|as_q)=', +'google_cf','(p|q|as_p|as_q)=', +'www_google_cg','(p|q|as_p|as_q)=', +'images_google_cg','(p|q|as_p|as_q)=', +'google_cg','(p|q|as_p|as_q)=', +'www_google_ch','(p|q|as_p|as_q)=', +'images_google_ch','(p|q|as_p|as_q)=', +'google_ch','(p|q|as_p|as_q)=', +'www_google_ci','(p|q|as_p|as_q)=', +'images_google_ci','(p|q|as_p|as_q)=', +'google_ci','(p|q|as_p|as_q)=', +'www_google_cl','(p|q|as_p|as_q)=', +'images_google_cl','(p|q|as_p|as_q)=', +'google_cl','(p|q|as_p|as_q)=', +'www_google_cm','(p|q|as_p|as_q)=', +'images_google_cm','(p|q|as_p|as_q)=', +'google_cm','(p|q|as_p|as_q)=', +'www_google_cn','(p|q|as_p|as_q)=', +'images_google_cn','(p|q|as_p|as_q)=', +'google_cn','(p|q|as_p|as_q)=', +'www_google_co_ao','(p|q|as_p|as_q)=', +'images_google_co_ao','(p|q|as_p|as_q)=', +'google_co_ao','(p|q|as_p|as_q)=', +'www_google_co_bw','(p|q|as_p|as_q)=', +'images_google_co_bw','(p|q|as_p|as_q)=', +'google_co_bw','(p|q|as_p|as_q)=', +'www_google_co_ck','(p|q|as_p|as_q)=', +'images_google_co_ck','(p|q|as_p|as_q)=', +'google_co_ck','(p|q|as_p|as_q)=', +'www_google_co_cr','(p|q|as_p|as_q)=', +'images_google_co_cr','(p|q|as_p|as_q)=', +'google_co_cr','(p|q|as_p|as_q)=', +'www_google_co_id','(p|q|as_p|as_q)=', +'images_google_co_id','(p|q|as_p|as_q)=', +'google_co_id','(p|q|as_p|as_q)=', +'www_google_co_il','(p|q|as_p|as_q)=', +'images_google_co_il','(p|q|as_p|as_q)=', +'google_co_il','(p|q|as_p|as_q)=', +'www_google_co_jp','(p|q|as_p|as_q)=', +'images_google_co_jp','(p|q|as_p|as_q)=', +'google_co_jp','(p|q|as_p|as_q)=', +'www_google_co_ke','(p|q|as_p|as_q)=', +'images_google_co_ke','(p|q|as_p|as_q)=', +'google_co_ke','(p|q|as_p|as_q)=', +'www_google_co_kr','(p|q|as_p|as_q)=', +'images_google_co_kr','(p|q|as_p|as_q)=', +'google_co_kr','(p|q|as_p|as_q)=', +'www_google_co_ls','(p|q|as_p|as_q)=', +'images_google_co_ls','(p|q|as_p|as_q)=', +'google_co_ls','(p|q|as_p|as_q)=', +'www_google_co_ma','(p|q|as_p|as_q)=', +'images_google_co_ma','(p|q|as_p|as_q)=', +'google_co_ma','(p|q|as_p|as_q)=', +'www_google_co_mz','(p|q|as_p|as_q)=', +'images_google_co_mz','(p|q|as_p|as_q)=', +'google_co_mz','(p|q|as_p|as_q)=', +'www_google_co_th','(p|q|as_p|as_q)=', +'images_google_co_th','(p|q|as_p|as_q)=', +'google_co_th','(p|q|as_p|as_q)=', +'www_google_co_tz','(p|q|as_p|as_q)=', +'images_google_co_tz','(p|q|as_p|as_q)=', +'google_co_tz','(p|q|as_p|as_q)=', +'www_google_co_ug','(p|q|as_p|as_q)=', +'images_google_co_ug','(p|q|as_p|as_q)=', +'google_co_ug','(p|q|as_p|as_q)=', +'www_google_co_uz','(p|q|as_p|as_q)=', +'images_google_co_uz','(p|q|as_p|as_q)=', +'google_co_uz','(p|q|as_p|as_q)=', +'www_google_co_ve','(p|q|as_p|as_q)=', +'images_google_co_ve','(p|q|as_p|as_q)=', +'google_co_ve','(p|q|as_p|as_q)=', +'www_google_co_vi','(p|q|as_p|as_q)=', +'images_google_co_vi','(p|q|as_p|as_q)=', +'google_co_vi','(p|q|as_p|as_q)=', +'www_google_co_za','(p|q|as_p|as_q)=', +'images_google_co_za','(p|q|as_p|as_q)=', +'google_co_za','(p|q|as_p|as_q)=', +'www_google_co_zm','(p|q|as_p|as_q)=', +'images_google_co_zm','(p|q|as_p|as_q)=', +'google_co_zm','(p|q|as_p|as_q)=', +'www_google_co_zw','(p|q|as_p|as_q)=', +'images_google_co_zw','(p|q|as_p|as_q)=', +'google_co_zw','(p|q|as_p|as_q)=', +'www_google_com_af','(p|q|as_p|as_q)=', +'images_google_com_af','(p|q|as_p|as_q)=', +'google_com_af','(p|q|as_p|as_q)=', +'www_google_com_ag','(p|q|as_p|as_q)=', +'images_google_com_ag','(p|q|as_p|as_q)=', +'google_com_ag','(p|q|as_p|as_q)=', +'www_google_com_ai','(p|q|as_p|as_q)=', +'images_google_com_ai','(p|q|as_p|as_q)=', +'google_com_ai','(p|q|as_p|as_q)=', +'www_google_com_ar','(p|q|as_p|as_q)=', +'images_google_com_ar','(p|q|as_p|as_q)=', +'google_com_ar','(p|q|as_p|as_q)=', +'www_google_com_bd','(p|q|as_p|as_q)=', +'images_google_com_bd','(p|q|as_p|as_q)=', +'google_com_bd','(p|q|as_p|as_q)=', +'www_google_com_bh','(p|q|as_p|as_q)=', +'images_google_com_bh','(p|q|as_p|as_q)=', +'google_com_bh','(p|q|as_p|as_q)=', +'www_google_com_bn','(p|q|as_p|as_q)=', +'images_google_com_bn','(p|q|as_p|as_q)=', +'google_com_bn','(p|q|as_p|as_q)=', +'www_google_com_bo','(p|q|as_p|as_q)=', +'images_google_com_bo','(p|q|as_p|as_q)=', +'google_com_bo','(p|q|as_p|as_q)=', +'www_google_com_br','(p|q|as_p|as_q)=', +'images_google_com_br','(p|q|as_p|as_q)=', +'google_com_br','(p|q|as_p|as_q)=', +'www_google_com_bz','(p|q|as_p|as_q)=', +'images_google_com_bz','(p|q|as_p|as_q)=', +'google_com_bz','(p|q|as_p|as_q)=', +'www_google_com_co','(p|q|as_p|as_q)=', +'images_google_com_co','(p|q|as_p|as_q)=', +'google_com_co','(p|q|as_p|as_q)=', +'www_google_com_cu','(p|q|as_p|as_q)=', +'images_google_com_cu','(p|q|as_p|as_q)=', +'google_com_cu','(p|q|as_p|as_q)=', +'www_google_com_cy','(p|q|as_p|as_q)=', +'images_google_com_cy','(p|q|as_p|as_q)=', +'google_com_cy','(p|q|as_p|as_q)=', +'www_google_com_do','(p|q|as_p|as_q)=', +'images_google_com_do','(p|q|as_p|as_q)=', +'google_com_do','(p|q|as_p|as_q)=', +'www_google_com_ec','(p|q|as_p|as_q)=', +'images_google_com_ec','(p|q|as_p|as_q)=', +'google_com_ec','(p|q|as_p|as_q)=', +'www_google_com_eg','(p|q|as_p|as_q)=', +'images_google_com_eg','(p|q|as_p|as_q)=', +'google_com_eg','(p|q|as_p|as_q)=', +'www_google_com_et','(p|q|as_p|as_q)=', +'images_google_com_et','(p|q|as_p|as_q)=', +'google_com_et','(p|q|as_p|as_q)=', +'www_google_com_fj','(p|q|as_p|as_q)=', +'images_google_com_fj','(p|q|as_p|as_q)=', +'google_com_fj','(p|q|as_p|as_q)=', +'www_google_com_gh','(p|q|as_p|as_q)=', +'images_google_com_gh','(p|q|as_p|as_q)=', +'google_com_gh','(p|q|as_p|as_q)=', +'www_google_com_gi','(p|q|as_p|as_q)=', +'images_google_com_gi','(p|q|as_p|as_q)=', +'google_com_gi','(p|q|as_p|as_q)=', +'www_google_com_gt','(p|q|as_p|as_q)=', +'images_google_com_gt','(p|q|as_p|as_q)=', +'google_com_gt','(p|q|as_p|as_q)=', +'www_google_com_hk','(p|q|as_p|as_q)=', +'images_google_com_hk','(p|q|as_p|as_q)=', +'google_com_hk','(p|q|as_p|as_q)=', +'www_google_com_jm','(p|q|as_p|as_q)=', +'images_google_com_jm','(p|q|as_p|as_q)=', +'google_com_jm','(p|q|as_p|as_q)=', +'www_google_com_kh','(p|q|as_p|as_q)=', +'images_google_com_kh','(p|q|as_p|as_q)=', +'google_com_kh','(p|q|as_p|as_q)=', +'www_google_com_kw','(p|q|as_p|as_q)=', +'images_google_com_kw','(p|q|as_p|as_q)=', +'google_com_kw','(p|q|as_p|as_q)=', +'www_google_com_lb','(p|q|as_p|as_q)=', +'images_google_com_lb','(p|q|as_p|as_q)=', +'google_com_lb','(p|q|as_p|as_q)=', +'www_google_com_lc','(p|q|as_p|as_q)=', +'images_google_com_lc','(p|q|as_p|as_q)=', +'google_com_lc','(p|q|as_p|as_q)=', +'www_google_com_ly','(p|q|as_p|as_q)=', +'images_google_com_ly','(p|q|as_p|as_q)=', +'google_com_ly','(p|q|as_p|as_q)=', +'www_google_com_mm','(p|q|as_p|as_q)=', +'images_google_com_mm','(p|q|as_p|as_q)=', +'google_com_mm','(p|q|as_p|as_q)=', +'www_google_com_mt','(p|q|as_p|as_q)=', +'images_google_com_mt','(p|q|as_p|as_q)=', +'google_com_mt','(p|q|as_p|as_q)=', +'www_google_com_mx','(p|q|as_p|as_q)=', +'images_google_com_mx','(p|q|as_p|as_q)=', +'google_com_mx','(p|q|as_p|as_q)=', +'www_google_com_my','(p|q|as_p|as_q)=', +'images_google_com_my','(p|q|as_p|as_q)=', +'google_com_my','(p|q|as_p|as_q)=', +'www_google_com_na','(p|q|as_p|as_q)=', +'images_google_com_na','(p|q|as_p|as_q)=', +'google_com_na','(p|q|as_p|as_q)=', +'www_google_com_nf','(p|q|as_p|as_q)=', +'images_google_com_nf','(p|q|as_p|as_q)=', +'google_com_nf','(p|q|as_p|as_q)=', +'www_google_com_ng','(p|q|as_p|as_q)=', +'images_google_com_ng','(p|q|as_p|as_q)=', +'google_com_ng','(p|q|as_p|as_q)=', +'www_google_com_ni','(p|q|as_p|as_q)=', +'images_google_com_ni','(p|q|as_p|as_q)=', +'google_com_ni','(p|q|as_p|as_q)=', +'www_google_com_np','(p|q|as_p|as_q)=', +'images_google_com_np','(p|q|as_p|as_q)=', +'google_com_np','(p|q|as_p|as_q)=', +'www_google_com_om','(p|q|as_p|as_q)=', +'images_google_com_om','(p|q|as_p|as_q)=', +'google_com_om','(p|q|as_p|as_q)=', +'www_google_com_pa','(p|q|as_p|as_q)=', +'images_google_com_pa','(p|q|as_p|as_q)=', +'google_com_pa','(p|q|as_p|as_q)=', +'www_google_com_pe','(p|q|as_p|as_q)=', +'images_google_com_pe','(p|q|as_p|as_q)=', +'google_com_pe','(p|q|as_p|as_q)=', +'www_google_com_pg','(p|q|as_p|as_q)=', +'images_google_com_pg','(p|q|as_p|as_q)=', +'google_com_pg','(p|q|as_p|as_q)=', +'www_google_com_ph','(p|q|as_p|as_q)=', +'images_google_com_ph','(p|q|as_p|as_q)=', +'google_com_ph','(p|q|as_p|as_q)=', +'www_google_com_pk','(p|q|as_p|as_q)=', +'images_google_com_pk','(p|q|as_p|as_q)=', +'google_com_pk','(p|q|as_p|as_q)=', +'www_google_com_pr','(p|q|as_p|as_q)=', +'images_google_com_pr','(p|q|as_p|as_q)=', +'google_com_pr','(p|q|as_p|as_q)=', +'www_google_com_py','(p|q|as_p|as_q)=', +'images_google_com_py','(p|q|as_p|as_q)=', +'google_com_py','(p|q|as_p|as_q)=', +'www_google_com_qa','(p|q|as_p|as_q)=', +'images_google_com_qa','(p|q|as_p|as_q)=', +'google_com_qa','(p|q|as_p|as_q)=', +'www_google_com_sa','(p|q|as_p|as_q)=', +'images_google_com_sa','(p|q|as_p|as_q)=', +'google_com_sa','(p|q|as_p|as_q)=', +'www_google_com_sb','(p|q|as_p|as_q)=', +'images_google_com_sb','(p|q|as_p|as_q)=', +'google_com_sb','(p|q|as_p|as_q)=', +'www_google_com_sg','(p|q|as_p|as_q)=', +'images_google_com_sg','(p|q|as_p|as_q)=', +'google_com_sg','(p|q|as_p|as_q)=', +'www_google_com_sl','(p|q|as_p|as_q)=', +'images_google_com_sl','(p|q|as_p|as_q)=', +'google_com_sl','(p|q|as_p|as_q)=', +'www_google_com_sv','(p|q|as_p|as_q)=', +'images_google_com_sv','(p|q|as_p|as_q)=', +'google_com_sv','(p|q|as_p|as_q)=', +'www_google_com_tj','(p|q|as_p|as_q)=', +'images_google_com_tj','(p|q|as_p|as_q)=', +'google_com_tj','(p|q|as_p|as_q)=', +'www_google_com_tw','(p|q|as_p|as_q)=', +'images_google_com_tw','(p|q|as_p|as_q)=', +'google_com_tw','(p|q|as_p|as_q)=', +'www_google_com_ua','(p|q|as_p|as_q)=', +'images_google_com_ua','(p|q|as_p|as_q)=', +'google_com_ua','(p|q|as_p|as_q)=', +'www_google_com_uy','(p|q|as_p|as_q)=', +'images_google_com_uy','(p|q|as_p|as_q)=', +'google_com_uy','(p|q|as_p|as_q)=', +'www_google_com_vc','(p|q|as_p|as_q)=', +'images_google_com_vc','(p|q|as_p|as_q)=', +'google_com_vc','(p|q|as_p|as_q)=', +'www_google_com_vn','(p|q|as_p|as_q)=', +'images_google_com_vn','(p|q|as_p|as_q)=', +'google_com_vn','(p|q|as_p|as_q)=', +'www_google_cv','(p|q|as_p|as_q)=', +'images_google_cv','(p|q|as_p|as_q)=', +'google_cv','(p|q|as_p|as_q)=', +'www_google_cz','(p|q|as_p|as_q)=', +'images_google_cz','(p|q|as_p|as_q)=', +'google_cz','(p|q|as_p|as_q)=', +'www_google_dj','(p|q|as_p|as_q)=', +'images_google_dj','(p|q|as_p|as_q)=', +'google_dj','(p|q|as_p|as_q)=', +'www_google_dk','(p|q|as_p|as_q)=', +'images_google_dk','(p|q|as_p|as_q)=', +'google_dk','(p|q|as_p|as_q)=', +'www_google_dm','(p|q|as_p|as_q)=', +'images_google_dm','(p|q|as_p|as_q)=', +'google_dm','(p|q|as_p|as_q)=', +'www_google_dz','(p|q|as_p|as_q)=', +'images_google_dz','(p|q|as_p|as_q)=', +'google_dz','(p|q|as_p|as_q)=', +'www_google_ee','(p|q|as_p|as_q)=', +'images_google_ee','(p|q|as_p|as_q)=', +'google_ee','(p|q|as_p|as_q)=', +'www_google_fi','(p|q|as_p|as_q)=', +'images_google_fi','(p|q|as_p|as_q)=', +'google_fi','(p|q|as_p|as_q)=', +'www_google_fm','(p|q|as_p|as_q)=', +'images_google_fm','(p|q|as_p|as_q)=', +'google_fm','(p|q|as_p|as_q)=', +'www_google_ga','(p|q|as_p|as_q)=', +'images_google_ga','(p|q|as_p|as_q)=', +'google_ga','(p|q|as_p|as_q)=', +'www_google_ge','(p|q|as_p|as_q)=', +'images_google_ge','(p|q|as_p|as_q)=', +'google_ge','(p|q|as_p|as_q)=', +'www_google_gf','(p|q|as_p|as_q)=', +'images_google_gf','(p|q|as_p|as_q)=', +'google_gf','(p|q|as_p|as_q)=', +'www_google_gg','(p|q|as_p|as_q)=', +'images_google_gg','(p|q|as_p|as_q)=', +'google_gg','(p|q|as_p|as_q)=', +'www_google_gl','(p|q|as_p|as_q)=', +'images_google_gl','(p|q|as_p|as_q)=', +'google_gl','(p|q|as_p|as_q)=', +'www_google_gm','(p|q|as_p|as_q)=', +'images_google_gm','(p|q|as_p|as_q)=', +'google_gm','(p|q|as_p|as_q)=', +'www_google_gp','(p|q|as_p|as_q)=', +'images_google_gp','(p|q|as_p|as_q)=', +'google_gp','(p|q|as_p|as_q)=', +'www_google_gy','(p|q|as_p|as_q)=', +'images_google_gy','(p|q|as_p|as_q)=', +'google_gy','(p|q|as_p|as_q)=', +'www_google_hn','(p|q|as_p|as_q)=', +'images_google_hn','(p|q|as_p|as_q)=', +'google_hn','(p|q|as_p|as_q)=', +'www_google_ht','(p|q|as_p|as_q)=', +'images_google_ht','(p|q|as_p|as_q)=', +'google_ht','(p|q|as_p|as_q)=', +'www_google_hu','(p|q|as_p|as_q)=', +'images_google_hu','(p|q|as_p|as_q)=', +'google_hu','(p|q|as_p|as_q)=', +'www_google_im','(p|q|as_p|as_q)=', +'images_google_im','(p|q|as_p|as_q)=', +'google_im','(p|q|as_p|as_q)=', +'www_google_io','(p|q|as_p|as_q)=', +'images_google_io','(p|q|as_p|as_q)=', +'google_io','(p|q|as_p|as_q)=', +'www_google_iq','(p|q|as_p|as_q)=', +'images_google_iq','(p|q|as_p|as_q)=', +'google_iq','(p|q|as_p|as_q)=', +'www_google_is','(p|q|as_p|as_q)=', +'images_google_is','(p|q|as_p|as_q)=', +'google_is','(p|q|as_p|as_q)=', +'www_google_je','(p|q|as_p|as_q)=', +'images_google_je','(p|q|as_p|as_q)=', +'google_je','(p|q|as_p|as_q)=', +'www_google_jo','(p|q|as_p|as_q)=', +'images_google_jo','(p|q|as_p|as_q)=', +'google_jo','(p|q|as_p|as_q)=', +'www_google_kg','(p|q|as_p|as_q)=', +'images_google_kg','(p|q|as_p|as_q)=', +'google_kg','(p|q|as_p|as_q)=', +'www_google_ki','(p|q|as_p|as_q)=', +'images_google_ki','(p|q|as_p|as_q)=', +'google_ki','(p|q|as_p|as_q)=', +'www_google_kz','(p|q|as_p|as_q)=', +'images_google_kz','(p|q|as_p|as_q)=', +'google_kz','(p|q|as_p|as_q)=', +'www_google_la','(p|q|as_p|as_q)=', +'images_google_la','(p|q|as_p|as_q)=', +'google_la','(p|q|as_p|as_q)=', +'www_google_li','(p|q|as_p|as_q)=', +'images_google_li','(p|q|as_p|as_q)=', +'google_li','(p|q|as_p|as_q)=', +'www_google_lk','(p|q|as_p|as_q)=', +'images_google_lk','(p|q|as_p|as_q)=', +'google_lk','(p|q|as_p|as_q)=', +'www_google_lt','(p|q|as_p|as_q)=', +'images_google_lt','(p|q|as_p|as_q)=', +'google_lt','(p|q|as_p|as_q)=', +'www_google_lu','(p|q|as_p|as_q)=', +'images_google_lu','(p|q|as_p|as_q)=', +'google_lu','(p|q|as_p|as_q)=', +'www_google_lv','(p|q|as_p|as_q)=', +'images_google_lv','(p|q|as_p|as_q)=', +'google_lv','(p|q|as_p|as_q)=', +'www_google_md','(p|q|as_p|as_q)=', +'images_google_md','(p|q|as_p|as_q)=', +'google_md','(p|q|as_p|as_q)=', +'www_google_me','(p|q|as_p|as_q)=', +'images_google_me','(p|q|as_p|as_q)=', +'google_me','(p|q|as_p|as_q)=', +'www_google_mg','(p|q|as_p|as_q)=', +'images_google_mg','(p|q|as_p|as_q)=', +'google_mg','(p|q|as_p|as_q)=', +'www_google_mk','(p|q|as_p|as_q)=', +'images_google_mk','(p|q|as_p|as_q)=', +'google_mk','(p|q|as_p|as_q)=', +'www_google_ml','(p|q|as_p|as_q)=', +'images_google_ml','(p|q|as_p|as_q)=', +'google_ml','(p|q|as_p|as_q)=', +'www_google_mn','(p|q|as_p|as_q)=', +'images_google_mn','(p|q|as_p|as_q)=', +'google_mn','(p|q|as_p|as_q)=', +'www_google_ms','(p|q|as_p|as_q)=', +'images_google_ms','(p|q|as_p|as_q)=', +'google_ms','(p|q|as_p|as_q)=', +'www_google_mu','(p|q|as_p|as_q)=', +'images_google_mu','(p|q|as_p|as_q)=', +'google_mu','(p|q|as_p|as_q)=', +'www_google_mv','(p|q|as_p|as_q)=', +'images_google_mv','(p|q|as_p|as_q)=', +'google_mv','(p|q|as_p|as_q)=', +'www_google_mw','(p|q|as_p|as_q)=', +'images_google_mw','(p|q|as_p|as_q)=', +'google_mw','(p|q|as_p|as_q)=', +'www_google_ne','(p|q|as_p|as_q)=', +'images_google_ne','(p|q|as_p|as_q)=', +'google_ne','(p|q|as_p|as_q)=', +'www_google_nr','(p|q|as_p|as_q)=', +'images_google_nr','(p|q|as_p|as_q)=', +'google_nr','(p|q|as_p|as_q)=', +'www_google_nu','(p|q|as_p|as_q)=', +'images_google_nu','(p|q|as_p|as_q)=', +'google_nu','(p|q|as_p|as_q)=', +'www_google_pn','(p|q|as_p|as_q)=', +'images_google_pn','(p|q|as_p|as_q)=', +'google_pn','(p|q|as_p|as_q)=', +'www_google_ps','(p|q|as_p|as_q)=', +'images_google_ps','(p|q|as_p|as_q)=', +'google_ps','(p|q|as_p|as_q)=', +'www_google_ro','(p|q|as_p|as_q)=', +'images_google_ro','(p|q|as_p|as_q)=', +'google_ro','(p|q|as_p|as_q)=', +'www_google_rs','(p|q|as_p|as_q)=', +'images_google_rs','(p|q|as_p|as_q)=', +'google_rs','(p|q|as_p|as_q)=', +'www_google_ru','(p|q|as_p|as_q)=', +'images_google_ru','(p|q|as_p|as_q)=', +'google_ru','(p|q|as_p|as_q)=', +'www_google_rw','(p|q|as_p|as_q)=', +'images_google_rw','(p|q|as_p|as_q)=', +'google_rw','(p|q|as_p|as_q)=', +'www_google_sc','(p|q|as_p|as_q)=', +'images_google_sc','(p|q|as_p|as_q)=', +'google_sc','(p|q|as_p|as_q)=', +'www_google_sh','(p|q|as_p|as_q)=', +'images_google_sh','(p|q|as_p|as_q)=', +'google_sh','(p|q|as_p|as_q)=', +'www_google_si','(p|q|as_p|as_q)=', +'images_google_si','(p|q|as_p|as_q)=', +'google_si','(p|q|as_p|as_q)=', +'www_google_sk','(p|q|as_p|as_q)=', +'images_google_sk','(p|q|as_p|as_q)=', +'google_sk','(p|q|as_p|as_q)=', +'www_google_sm','(p|q|as_p|as_q)=', +'images_google_sm','(p|q|as_p|as_q)=', +'google_sm','(p|q|as_p|as_q)=', +'www_google_sn','(p|q|as_p|as_q)=', +'images_google_sn','(p|q|as_p|as_q)=', +'google_sn','(p|q|as_p|as_q)=', +'www_google_so','(p|q|as_p|as_q)=', +'images_google_so','(p|q|as_p|as_q)=', +'google_so','(p|q|as_p|as_q)=', +'www_google_sr','(p|q|as_p|as_q)=', +'images_google_sr','(p|q|as_p|as_q)=', +'google_sr','(p|q|as_p|as_q)=', +'www_google_st','(p|q|as_p|as_q)=', +'images_google_st','(p|q|as_p|as_q)=', +'google_st','(p|q|as_p|as_q)=', +'www_google_td','(p|q|as_p|as_q)=', +'images_google_td','(p|q|as_p|as_q)=', +'google_td','(p|q|as_p|as_q)=', +'www_google_tg','(p|q|as_p|as_q)=', +'images_google_tg','(p|q|as_p|as_q)=', +'google_tg','(p|q|as_p|as_q)=', +'www_google_tk','(p|q|as_p|as_q)=', +'images_google_tk','(p|q|as_p|as_q)=', +'google_tk','(p|q|as_p|as_q)=', +'www_google_tl','(p|q|as_p|as_q)=', +'images_google_tl','(p|q|as_p|as_q)=', +'google_tl','(p|q|as_p|as_q)=', +'www_google_tm','(p|q|as_p|as_q)=', +'images_google_tm','(p|q|as_p|as_q)=', +'google_tm','(p|q|as_p|as_q)=', +'www_google_tn','(p|q|as_p|as_q)=', +'images_google_tn','(p|q|as_p|as_q)=', +'google_tn','(p|q|as_p|as_q)=', +'www_google_to','(p|q|as_p|as_q)=', +'images_google_to','(p|q|as_p|as_q)=', +'google_to','(p|q|as_p|as_q)=', +'www_google_tt','(p|q|as_p|as_q)=', +'images_google_tt','(p|q|as_p|as_q)=', +'google_tt','(p|q|as_p|as_q)=', +'www_google_us','(p|q|as_p|as_q)=', +'images_google_us','(p|q|as_p|as_q)=', +'google_us','(p|q|as_p|as_q)=', +'www_google_vg','(p|q|as_p|as_q)=', +'images_google_vg','(p|q|as_p|as_q)=', +'google_vg','(p|q|as_p|as_q)=', +'www_google_vu','(p|q|as_p|as_q)=', +'images_google_vu','(p|q|as_p|as_q)=', +'google_vu','(p|q|as_p|as_q)=', +'www_google_ws','(p|q|as_p|as_q)=', +'images_google_ws','(p|q|as_p|as_q)=', +'google_ws','(p|q|as_p|as_q)=', + +'google_base','(q|p|as_p|as_q)=', +'google_translate','q=', +'googleByIP','googleByIP','q=', +'google_catchall','(q|p|as_p|as_q)=', +'google_cache','(p|q|as_p|as_q)=cache:[0-9A-Za-z]{12}:', +'google_froogle','(q|p|as_p|as_q)=', +'google_groups','group\/', # does not work +'google_maps','(dq|q|p|as_p|as_q)=', +'google_products','(q|p|as_p|as_q)=', +'google4counter','(q|p|as_p|as_q)=', + +'1klik','query=', +'1search','query=', +'1und1_de','q=', +'3721','(p|name)=', +'a9','a9\.com\/', +'abacho','q=', +'accoona','qt=', +'alexa','q=', +'aliceit','qs=', +'aliceitmaster','qs=', +'allesklar','q=', +'allgameshome','s=', +'alltheweb','q(|uery)=', +'alot','q=', +'altavista','p=', +'amazon','query=', +'androidsearch','q=', +'answerbus','', +'anzwers','search=', +'aol_o2suche_de','q(|uery)=', +'aolcatchall','q(|query)=', +'aolcom','q(|query)=', +'aolde','q(|query)=', +'aolfr','q(|uery)=', +'aolpl','q(|query)=', +'aolsearchde','q(|query)=', +'aoluk','q(|query)=', +'aport','r=', +'arianna','query=', +'asevenboard','query=', +'askcatchall','q=', +'askde','q=', +'askes','q=', +'askfr','q=', +'askimages','q=', +'askit','q=', +'askjp','q=', +'asknl','q=', +'askuk','q=', +'atlanticbb','q=', +'atlas','(searchtext|q)=', +'atomz','sp-q=', +'att','qry=', +'au_search_yahoo_com','p=', +'auone','q=', +'avantfind','keywords=', +'avg','q=', +'babylon','q=', +'baidu','(word|wd)=', +'bbc','q=', +'benefind','q=', +'biglotron','question=', +'bing','q(|pvt)=', +'blekko','q=', +'blingo','q=', +'bluewin', 'qry=', +'bt','p=', +'bungeebonesdotcom','query=', +'ca_search_yahoo_com','p=', +'centraldatabase','query=', +'centrum','q=', +'centurylink','q=', +'certifiedtoolbarsearch','q=', +'charter','q=', +'chatzum','q=', +'checkparams','q=', +'chelloat','q1=', +'chellobe','q1=', +'chellocom','q1=', +'chellocz','q1=', +'chellofr','q1=', +'chellohu','q1=', +'chellonl','q1=', +'chellono','q1=', +'chellopl','q1=', +'chellose','q1=', +'chellosk','q1=', +'clarosearch','q=', +'clinck','q=', +'clubinternet', 'q=', +'clusty','query=', +'comcast','q=', +'comettoolbar','qry=', +'conduit','q=', +'copernic','web\/', +'crawler','q=', +'ctrouve','q=', +'dalesearch','q=', +'danielsen','q=', +'daum','q=', +'de_dolphin_com','q=', +'de_wiki_gov_cn','de\.wiki\.gov\.cn\/s_', +'dejanews','q=', +'delicious','all=', +'delta-search','q=', +'digg','s=', +'dmoz','search=', +'dodajpl','keyword=', +'dogpile', '(q=|kw=|web/)', +'duckduckgo','nokey=', +'earthlink', 'q=', +'easysearch','s=', +'ecosia','q=', +'edderkoppen','query=', +'engine','p1=', +'eniro','q=', +'enirose','q=', +'ereadingsource','q=', +'etools_ch','query=', +'euroseek','query=', +'everyclick','keyword=', +'excite','search=', +'facemoods','(q|s)=', +'fastbot_de','red=[0-9]*\+', +'fbdownloader','q=', +'fdownloadr_com','q=', +'find1friend','q=', +'findamo','q=', +'findarticles','key=', +'finddk','words=', +'fireball','q=', +'flipora','q=', +'foxstart','q=', +'fr_search_yahoo_com','p=', +'francite','name=', +'free','q=', +'freenet_de','query=', +'freeserve','q=', +'funmoods','(q|s)=', +'gazetapl','slowo=', +'genieo','q=', +'gerypl','q=', +'globososo','q=', +'gmxsuche','q=', +'gmxsuche_at','q=', +'go','qt=', +'go.mail.ru','q=', +'go2net','general=', +'godado','Keywords=', +'goliat','KERESES=', +'goodsearch','Keywords=', +'gotuneed','', # Not yet known +'govome','q=', +'haku','w=', +'handycafe','q=', +'heureka','heureka=', +'hogapl','qt=', +'holasearch','q=', +'hotbot','mt=', +'hp_my_aol','q(|uery)=', +'hubwe','p=', +'iask','(w|k)=', +'iboats','highlight=', +'icerocket','q=', +'ichiro','MT=', +'icq','q=', +'ilse','search_for=', +'image_search_yahoo_co_jp','p=', +'images_search_yahoo_com','p=', +'iminent','q=', +'inbox','q=', +'incredibar','q=', +'incredimail','q=', +'indexhu','q=', +'ineffabile','', +'infoseek','qt=', +'infospace','qkw=', +'infouk','qkw=', +'inspsearch','q=', +'int_search_myway_com','searchfor=', +'interiapl','q=', +'internetto','searchstr=', +'isearch_nation_com','q=', +'it_search_yahoo_com','p=', +'iune','(keywords|q)=', +'ixquick', 'query=', +'izito_catchall','(query|OVKEY|q)=', +'izito_de','(query|OVKEY|q)=', +'izito_uk','(query|OVKEY|q)=', +'jubii','soegeord=', +'jumpyit','searchWord=', +'juno','query=', +'jyxo','(s|q)=', +'kartoo','', +'katalogonetpl','qt=', +'kataweb','q=', +'keresolap_hu','q=', +'kvasir', 'q=', +'kvitters','query=', +'lapkereso_hu','q=', +'lbb','q=', +'ledix','q=', +'libero','qs=', +'libertysurf','q=', +'live','q=', +'localmoxie','keyword=', +'looksmart_catchall','key=', +'looksmart_co_uk','key=', +'lycos','query=', +'mamma','query=', +'meinestadt','q=', +'metabot','st=', +'metacrawler','general=', +'metacrawler_de','qry=', +'metager','eingabe=', +'metahannover','q=', +'metasearch','q=', +'metaspinner','qry=', +'metasuche_ch','q=', +'metaua','q=', +'miner','q=', +'mirago','(txtsearch|qry)=', +'miragobe','(txtsearch|qry)=', +'miragoch','(txtsearch|qry)=', +'miragocouk','(txtsearch|qry)=', +'miragode','(txtsearch|qry)=', +'miragodk','(txtsearch|qry)=', +'miragoes','(txtsearch|qry)=', +'miragofr','(txtsearch|qry)=', +'miragoit','(txtsearch|qry)=', +'miragonl','(txtsearch|qry)=', +'miragono','(txtsearch|qry)=', +'miragose','(txtsearch|qry)=', +'mitrasites','q=', +'mozbot','q=', +'msn','q=', +'mys_yoursearch_me','q=', +'mysearch','searchfor=', +'mysearchdial','q=', +'mysearchresults','q=', +'myway','searchfor=', +'mywebsearch','searchfor=', +'najdi','dotaz=', +'nation','q=', +'navigationshilfe_t_online','q=', +'nbci','keyword=', +'netease','q=', +'netluchs','query=', +'netscape','search=', +'netsprintpl','q=', +'netstjernen','q=', +'netzero','query=', +'no query string available', +'northernlight','qr=', +'nortonsavesearch','q=', +'nusearch','nusearch_terms=', +'o2pl','qt=', +'ofir','querytext=', +'oneseek_de','q=', +'onetpl','qt=', +'opasia','q=', +'orange','q=', +'orangeworld','q=', +'orbis','search_field=', +'origo','(q|search)=', +'overture','keywords=', +'passagen','q=', +'pch','q=', +'peoplecheck_de','q=', +'ph_search_yahoo_com','p=', +'picsearch','q=', +'pictures','q=', +'plusnetwork','q=', +'pogodak','q=', +'polskapl','qt=', +'polymeta_hu','', +'preciobarato_xyz','s=', +'questionanswering','', +'quick','query=', +'qwant_com','q=', +'r_search_yahoo_com','p=', +'rakuten','qt=', +'rambler','words=', +'redbox','srch=', +'rr','q=', +'safehomepage_com','q=', +'sagool','q=', +'sapo','q=', +'schoenerbrausen','q=', +'scroogle','Gw=', +'se_search_yahoo_com','p=', +'search.com','q=', +'search_1und1_de','q=', +'search_foxtab_com','q=', +'search_socialdownloadr_com','q=', +'search_yahoo_co_jp','p=', +'search_yahoo_com','p=', +'search_zonealarm_com','q=', +'searchalgo','q=', +'searchalot','q=', +'searchch', 'q=', +'searchcompletion','q=', +'searches_qone8_com','q=', +'searchesnavigator','query=', +'searchfunmoods','q=', +'searchgol','q=', +'searchlistingsite','keyword=', +'searchmobileonline','q=', +'searchresults','q=', +'search-results_mobi','q=', +'searchresultscom','q=', +'searchresultsmobi','q=', +'searchsafer','q=', +'searchy', 'search_term=', +'searchya','q=', +'segnalo','', +'semalt','u=', +'sensis','find=', +'seznam','(w|q)=', +'sg_search_yahoo_com','p=', +'sg_yhs4_search_yahoo_com','p=', +'shawca','q=', +'shinyseek\.it','KEY=', +'shoppstop','keywords=', +'sify','keyword=', +'sky','term=', +'smartsuggestor','s=', +'smde','q=', +'snapdo','q=', +'softonic','q=', +'sogou', 'query=', +'sol','q=', +'soso','q=', +'speedbit','q=', +'sphere','q=', +'splut','pattern=', +'spotjockey','Search_Keyword=', +'spray','string=', +'start','q=', +'startlap_hu','q=', +'startpage','query=', +'startsiden','q=', +'startxxl','q=', +'steadysearch','w=', +'stumbleupon','', +'sucheaolde','q(|query)=', +'supereva','q=', +'surfcanyon_com','q=', +'sweetim','q=', +'sweetpacks','q=', +'swik','swik\.net/', +'swisscows_ch','query=', +'sympatico', 'query=', +'szukaczpl','q=', +'t_online_catchall','q=', +'t_online_de','q=', +'talktalk_uk', 'query=', +'tango_hu','q=', +'tbask','searchfor=', +'teecnoit','q=', +'teoma','q=', +'terra','query=', +'tesco','q=', +'theallsearches','query=', +'three','q=', +'tiscali','key=', +'tixuma_de','sc=', +'toile', 'q=', +'toshiba','q=', +'turtle','q=', +'tw_images_search_yahoo_com','p=', +'tyfon','q=', +'uk_foxstart_com','q=', +'uk_search_yahoo_com','p=', +'uk_yhs4_search_yahoo_com','p=', +'ukdirectory','k=', +'ukindex', 'stext=', +'ukplus','search=', +'umfis','suchbegriff=', +'umuwa_de','umuwa\.de\/', +'us_search_yahoo_com','p=', +'vindex','in=', +'virgilio','qs=', +'virginmedia','q=', +'vi-view_com','q=', +'vivisimo','query=', +'vlips_de','q=', +'vnet','kw=', +'voila','(kw|rdata)=', +'wahoo','q=', +'webalta','q=', +'webcrawler','(q|searchText)=', +'webde','su=', +'webmania','q=', +'whorush_com','q=', +'windowssearch_com','q=', +'wisenut','query=', +'wow_utop_it','q=', +'wowpl','q=', +'wowsearch','q=', +'wowuk','q=', +'wp','szukaj=', +'www_buenosearch_com','q=', +'www_dregol_com','q=', +'www_wow_com','q=', +'wwweasel','q=', +'yahoo_catchall','p=', +'yahoo_mindset','p=', +'yandex','text=', +'yandexcom','text=', +'yandexcomtr','text=', +'yandexkz','text=', +'yandexru','text=', +'yandexua','text=', +'yell','keywords=', +'yourbestsearch','k=', +'youtube','q=', +'zapmeta_catchall','query=', +'zapmeta_ch','query=', +'zapmeta_com','query=', +'zapmeta_de','query=', +'zhongsou','(word|w)=', +'zoeken','q(|uery)=', +'zoznam','q=' +); + +# SearchEnginesKnownUrlNotFound +# Known rules to extract not found keywords from a referrer search engine URL +#------------------------------------------------------------------------------ +%SearchEnginesKnownUrlNotFound=( +# Most common search engines +'msn','origq=' +); + +# If no rules are known, WordsToExtractSearchUrl will be used to search keyword parameter +# If no rules are known and search in WordsToExtractSearchUrl failed, this will be used to clean URL of not keyword parameters. +#------------------------------------------------------------------------------ +@WordsToExtractSearchUrl= ('all=','as_q=','ask=','claus=','dotaz=','find=','general=','Gw=','heureka=','highlight=','ie=','image keys=','imgurl=','in=','iu=','k=','KERESES=','key=','keyword=','keywords=','kw=','mt=','name=','nusearch_terms=','OVKEY=','p=','p1=','pattern=','q=','q1=','qkw=','qpvt=','qr=','qr=','qry=','qs=','qt=','query=','querytext=','question=','r=','rdata=','req=','s=','search=','search_field=','search_for=','Search_Keyword=','search_term=','searchfor=','searchstr=','searchtext=','searchWord=','Serbian=','slowo=','soegeord=','sp-q=','srch=','st=','stext=','string=','su=','szukaj=','term=','text=','tn=','txtsearch=','uery=','w=','wd=','word=','words='); +@WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look='); + +# SearchEnginesKnownUTFCoding +# Known parameter that proves a search engine has coded its parameters in UTF-8 +#------------------------------------------------------------------------------ +%SearchEnginesKnownUTFCoding=( +# Most common search engines +'google','ie=utf-8', +'alltheweb','cs=utf-8' +); + + +# SearchEnginesHashLib +# List of search engines names +# 'search_engine_id', 'search_engine_name', +#------------------------------------------------------------------------------ +%SearchEnginesHashLib=( +# Search engine output links or not as the case may be + +'www_google_co_uk','Google United Kingdom', +'images_google_co_uk','Google United Kingdom ( images )', +'google_co_uk','Google United Kingdom ( catchall )', +'www_google_com','Google .com', +'images_google_com','Google .com ( images )', +'google_com','Google .com ( catchall )', +'www_google_de','Google Germany', +'images_google_de','Google Germany ( images )', +'google_de','Google Germany ( catchall )', +'www_google_fr','Google France', +'images_google_fr','Google France ( images )', +'google_fr','Google France ( catchall )', +'www_google_ca','Google Canada', +'images_google_ca','Google Canada ( images )', +'google_ca','Google Canada ( catchall )', +'www_google_es','Google Spain', +'images_google_es','Google Spain ( images )', +'google_es','Google Spain ( catchall )', +'www_google_com_au','Google Australia', +'images_google_com_au','Google Australia ( images )', +'google_com_au','Google Australia ( catchall )', +'www_google_nl','Google Netherlands', +'images_google_nl','Google Netherlands ( images )', +'google_nl','Google Netherlands ( catchall )', +'www_google_gr','Google Greece', +'images_google_gr','Google Greece ( images )', +'google_gr','Google Greece ( catchall )', +'www_google_se','Google Sweden', +'images_google_se','Google Sweden ( images )', +'google_se','Google Sweden ( catchall )', +'www_google_ie','Google Ireland', +'images_google_ie','Google Ireland ( images )', +'google_ie','Google Ireland ( catchall )', +'www_google_it','Google Italy', +'images_google_it','Google Italy ( images )', +'google_it','Google Italy ( catchall )', +'www_google_no','Google Norway', +'images_google_no','Google Norway ( images )', +'google_no','Google Norway ( catchall )', +'www_google_com_tr','Google Turkey', +'images_google_com_tr','Google Turkey ( images )', +'google_com_tr','Google Turkey ( catchall )', +'www_google_co_in','Google India', +'images_google_co_in','Google India ( images )', +'google_co_in','Google India ( catchall )', +'www_google_pt','Google Portugal', +'images_google_pt','Google Portugal ( images )', +'google_pt','Google Portugal ( catchall )', +'www_google_hr','Google Croatia', +'images_google_hr','Google Croatia ( images )', +'google_hr','Google Croatia ( catchall )', +'www_google_co_nz','Google New Zealand', +'images_google_co_nz','Google New Zealand ( images )', +'google_co_nz','Google New Zealand ( catchall )', +'www_google_pl','Google Poland', +'images_google_pl','Google Poland ( images )', +'google_pl','Google Poland ( catchall )', +'www_google_ac','Google Ascension Island', +'images_google_ac','Google Ascension Island ( images )', +'google_ac','Google Ascension Island ( catchall )', +'www_google_ad','Google Andorra', +'images_google_ad','Google Andorra ( images )', +'google_ad','Google Andorra ( catchall )', +'www_google_ae','Google United Arab Emirates', +'images_google_ae','Google United Arab Emirates ( images )', +'google_ae','Google United Arab Emirates ( catchall )', +'www_google_al','Google Albania', +'images_google_al','Google Albania ( images )', +'google_al','Google Albania ( catchall )', +'www_google_am','Google Armenia', +'images_google_am','Google Armenia ( images )', +'google_am','Google Armenia ( catchall )', +'www_google_as','Google American Samoa', +'images_google_as','Google American Samoa ( images )', +'google_as','Google American Samoa ( catchall )', +'www_google_at','Google Austria', +'images_google_at','Google Austria ( images )', +'google_at','Google Austria ( catchall )', +'www_google_az','Google Azerbaijan', +'images_google_az','Google Azerbaijan ( images )', +'google_az','Google Azerbaijan ( catchall )', +'www_google_ba','Google Bosnia and Herzegovina', +'images_google_ba','Google Bosnia and Herzegovina ( images )', +'google_ba','Google Bosnia and Herzegovina ( catchall )', +'www_google_be','Google Belgium', +'images_google_be','Google Belgium ( images )', +'google_be','Google Belgium ( catchall )', +'www_google_bf','Google Burkina Faso', +'images_google_bf','Google Burkina Faso ( images )', +'google_bf','Google Burkina Faso ( catchall )', +'www_google_bg','Google Bulgaria', +'images_google_bg','Google Bulgaria ( images )', +'google_bg','Google Bulgaria ( catchall )', +'www_google_bi','Google Burundi', +'images_google_bi','Google Burundi ( images )', +'google_bi','Google Burundi ( catchall )', +'www_google_bj','Google Benin', +'images_google_bj','Google Benin ( images )', +'google_bj','Google Benin ( catchall )', +'www_google_bs','Google Bahamas', +'images_google_bs','Google Bahamas ( images )', +'google_bs','Google Bahamas ( catchall )', +'www_google_bt','Google Bhutan', +'images_google_bt','Google Bhutan ( images )', +'google_bt','Google Bhutan ( catchall )', +'www_google_by','Google Belarus', +'images_google_by','Google Belarus ( images )', +'google_by','Google Belarus ( catchall )', +'www_google_cat','Google Catalan Countries', +'images_google_cat','Google Catalan Countries ( images )', +'google_cat','Google Catalan Countries ( catchall )', +'www_google_cc','Google Cocos (Keeling) Islands', +'images_google_cc','Google Cocos (Keeling) Islands ( images )', +'google_cc','Google Cocos (Keeling) Islands ( catchall )', +'www_google_cd','Google Democratic Republic of the Congo', +'images_google_cd','Google Democratic Republic of the Congo ( images )', +'google_cd','Google Democratic Republic of the Congo ( catchall )', +'www_google_cf','Google Central African Republic', +'images_google_cf','Google Central African Republic ( images )', +'google_cf','Google Central African Republic ( catchall )', +'www_google_cg','Google Republic of the Congo', +'images_google_cg','Google Republic of the Congo ( images )', +'google_cg','Google Republic of the Congo ( catchall )', +'www_google_ch','Google Switzerland', +'images_google_ch','Google Switzerland ( images )', +'google_ch','Google Switzerland ( catchall )', +'www_google_ci','Google Ivory Coast', +'images_google_ci','Google Ivory Coast ( images )', +'google_ci','Google Ivory Coast ( catchall )', +'www_google_cl','Google Chile', +'images_google_cl','Google Chile ( images )', +'google_cl','Google Chile ( catchall )', +'www_google_cm','Google Cameroon', +'images_google_cm','Google Cameroon ( images )', +'google_cm','Google Cameroon ( catchall )', +'www_google_cn','Google China', +'images_google_cn','Google China ( images )', +'google_cn','Google China ( catchall )', +'www_google_co_ao','Google Angola', +'images_google_co_ao','Google Angola ( images )', +'google_co_ao','Google Angola ( catchall )', +'www_google_co_bw','Google Botswana', +'images_google_co_bw','Google Botswana ( images )', +'google_co_bw','Google Botswana ( catchall )', +'www_google_co_ck','Google Cook Islands', +'images_google_co_ck','Google Cook Islands ( images )', +'google_co_ck','Google Cook Islands ( catchall )', +'www_google_co_cr','Google Costa Rica', +'images_google_co_cr','Google Costa Rica ( images )', +'google_co_cr','Google Costa Rica ( catchall )', +'www_google_co_id','Google Indonesia', +'images_google_co_id','Google Indonesia ( images )', +'google_co_id','Google Indonesia ( catchall )', +'www_google_co_il','Google Israel', +'images_google_co_il','Google Israel ( images )', +'google_co_il','Google Israel ( catchall )', +'www_google_co_jp','Google Japan', +'images_google_co_jp','Google Japan ( images )', +'google_co_jp','Google Japan ( catchall )', +'www_google_co_ke','Google Kenya', +'images_google_co_ke','Google Kenya ( images )', +'google_co_ke','Google Kenya ( catchall )', +'www_google_co_kr','Google South Korea', +'images_google_co_kr','Google South Korea ( images )', +'google_co_kr','Google South Korea ( catchall )', +'www_google_co_ls','Google Lesotho', +'images_google_co_ls','Google Lesotho ( images )', +'google_co_ls','Google Lesotho ( catchall )', +'www_google_co_ma','Google Morocco', +'images_google_co_ma','Google Morocco ( images )', +'google_co_ma','Google Morocco ( catchall )', +'www_google_co_mz','Google Mozambique', +'images_google_co_mz','Google Mozambique ( images )', +'google_co_mz','Google Mozambique ( catchall )', +'www_google_co_th','Google Thailand', +'images_google_co_th','Google Thailand ( images )', +'google_co_th','Google Thailand ( catchall )', +'www_google_co_tz','Google Tanzania', +'images_google_co_tz','Google Tanzania ( images )', +'google_co_tz','Google Tanzania ( catchall )', +'www_google_co_ug','Google Uganda', +'images_google_co_ug','Google Uganda ( images )', +'google_co_ug','Google Uganda ( catchall )', +'www_google_co_uz','Google Uzbekistan', +'images_google_co_uz','Google Uzbekistan ( images )', +'google_co_uz','Google Uzbekistan ( catchall )', +'www_google_co_ve','Google Venezuela', +'images_google_co_ve','Google Venezuela ( images )', +'google_co_ve','Google Venezuela ( catchall )', +'www_google_co_vi','Google United States Virgin Islands', +'images_google_co_vi','Google United States Virgin Islands ( images )', +'google_co_vi','Google United States Virgin Islands ( catchall )', +'www_google_co_za','Google South Africa', +'images_google_co_za','Google South Africa ( images )', +'google_co_za','Google South Africa ( catchall )', +'www_google_co_zm','Google Zambia', +'images_google_co_zm','Google Zambia ( images )', +'google_co_zm','Google Zambia ( catchall )', +'www_google_co_zw','Google Zimbabwe', +'images_google_co_zw','Google Zimbabwe ( images )', +'google_co_zw','Google Zimbabwe ( catchall )', +'www_google_com_af','Google Afghanistan', +'images_google_com_af','Google Afghanistan ( images )', +'google_com_af','Google Afghanistan ( catchall )', +'www_google_com_ag','Google Antigua and Barbuda', +'images_google_com_ag','Google Antigua and Barbuda ( images )', +'google_com_ag','Google Antigua and Barbuda ( catchall )', +'www_google_com_ai','Google Anguilla', +'images_google_com_ai','Google Anguilla ( images )', +'google_com_ai','Google Anguilla ( catchall )', +'www_google_com_ar','Google Argentina', +'images_google_com_ar','Google Argentina ( images )', +'google_com_ar','Google Argentina ( catchall )', +'www_google_com_bd','Google Bangladesh', +'images_google_com_bd','Google Bangladesh ( images )', +'google_com_bd','Google Bangladesh ( catchall )', +'www_google_com_bh','Google Bahrain', +'images_google_com_bh','Google Bahrain ( images )', +'google_com_bh','Google Bahrain ( catchall )', +'www_google_com_bn','Google Brunei', +'images_google_com_bn','Google Brunei ( images )', +'google_com_bn','Google Brunei ( catchall )', +'www_google_com_bo','Google Bolivia', +'images_google_com_bo','Google Bolivia ( images )', +'google_com_bo','Google Bolivia ( catchall )', +'www_google_com_br','Google Brazil', +'images_google_com_br','Google Brazil ( images )', +'google_com_br','Google Brazil ( catchall )', +'www_google_com_bz','Google Belize', +'images_google_com_bz','Google Belize ( images )', +'google_com_bz','Google Belize ( catchall )', +'www_google_com_co','Google Colombia', +'images_google_com_co','Google Colombia ( images )', +'google_com_co','Google Colombia ( catchall )', +'www_google_com_cu','Google Cuba', +'images_google_com_cu','Google Cuba ( images )', +'google_com_cu','Google Cuba ( catchall )', +'www_google_com_cy','Google Cyprus', +'images_google_com_cy','Google Cyprus ( images )', +'google_com_cy','Google Cyprus ( catchall )', +'www_google_com_do','Google Dominican Republic', +'images_google_com_do','Google Dominican Republic ( images )', +'google_com_do','Google Dominican Republic ( catchall )', +'www_google_com_ec','Google Ecuador', +'images_google_com_ec','Google Ecuador ( images )', +'google_com_ec','Google Ecuador ( catchall )', +'www_google_com_eg','Google Egypt', +'images_google_com_eg','Google Egypt ( images )', +'google_com_eg','Google Egypt ( catchall )', +'www_google_com_et','Google Ethiopia', +'images_google_com_et','Google Ethiopia ( images )', +'google_com_et','Google Ethiopia ( catchall )', +'www_google_com_fj','Google Fiji', +'images_google_com_fj','Google Fiji ( images )', +'google_com_fj','Google Fiji ( catchall )', +'www_google_com_gh','Google Ghana', +'images_google_com_gh','Google Ghana ( images )', +'google_com_gh','Google Ghana ( catchall )', +'www_google_com_gi','Google Gibraltar', +'images_google_com_gi','Google Gibraltar ( images )', +'google_com_gi','Google Gibraltar ( catchall )', +'www_google_com_gt','Google Guatemala', +'images_google_com_gt','Google Guatemala ( images )', +'google_com_gt','Google Guatemala ( catchall )', +'www_google_com_hk','Google Hong Kong', +'images_google_com_hk','Google Hong Kong ( images )', +'google_com_hk','Google Hong Kong ( catchall )', +'www_google_com_jm','Google Jamaica', +'images_google_com_jm','Google Jamaica ( images )', +'google_com_jm','Google Jamaica ( catchall )', +'www_google_com_kh','Google Cambodia', +'images_google_com_kh','Google Cambodia ( images )', +'google_com_kh','Google Cambodia ( catchall )', +'www_google_com_kw','Google Kuwait', +'images_google_com_kw','Google Kuwait ( images )', +'google_com_kw','Google Kuwait ( catchall )', +'www_google_com_lb','Google Lebanon', +'images_google_com_lb','Google Lebanon ( images )', +'google_com_lb','Google Lebanon ( catchall )', +'www_google_com_lc','Google Saint Lucia', +'images_google_com_lc','Google Saint Lucia ( images )', +'google_com_lc','Google Saint Lucia ( catchall )', +'www_google_com_ly','Google Libya', +'images_google_com_ly','Google Libya ( images )', +'google_com_ly','Google Libya ( catchall )', +'www_google_com_mm','Google Myanmar', +'images_google_com_mm','Google Myanmar ( images )', +'google_com_mm','Google Myanmar ( catchall )', +'www_google_com_mt','Google Malta', +'images_google_com_mt','Google Malta ( images )', +'google_com_mt','Google Malta ( catchall )', +'www_google_com_mx','Google Mexico', +'images_google_com_mx','Google Mexico ( images )', +'google_com_mx','Google Mexico ( catchall )', +'www_google_com_my','Google Malaysia', +'images_google_com_my','Google Malaysia ( images )', +'google_com_my','Google Malaysia ( catchall )', +'www_google_com_na','Google Namibia', +'images_google_com_na','Google Namibia ( images )', +'google_com_na','Google Namibia ( catchall )', +'www_google_com_nf','Google Norfolk Island', +'images_google_com_nf','Google Norfolk Island ( images )', +'google_com_nf','Google Norfolk Island ( catchall )', +'www_google_com_ng','Google Nigeria', +'images_google_com_ng','Google Nigeria ( images )', +'google_com_ng','Google Nigeria ( catchall )', +'www_google_com_ni','Google Nicaragua', +'images_google_com_ni','Google Nicaragua ( images )', +'google_com_ni','Google Nicaragua ( catchall )', +'www_google_com_np','Google Nepal', +'images_google_com_np','Google Nepal ( images )', +'google_com_np','Google Nepal ( catchall )', +'www_google_com_om','Google Oman', +'images_google_com_om','Google Oman ( images )', +'google_com_om','Google Oman ( catchall )', +'www_google_com_pa','Google Panama', +'images_google_com_pa','Google Panama ( images )', +'google_com_pa','Google Panama ( catchall )', +'www_google_com_pe','Google Peru', +'images_google_com_pe','Google Peru ( images )', +'google_com_pe','Google Peru ( catchall )', +'www_google_com_pg','Google Papua New Guinea', +'images_google_com_pg','Google Papua New Guinea ( images )', +'google_com_pg','Google Papua New Guinea ( catchall )', +'www_google_com_ph','Google Philippines', +'images_google_com_ph','Google Philippines ( images )', +'google_com_ph','Google Philippines ( catchall )', +'www_google_com_pk','Google Pakistan', +'images_google_com_pk','Google Pakistan ( images )', +'google_com_pk','Google Pakistan ( catchall )', +'www_google_com_pr','Google Puerto Rico', +'images_google_com_pr','Google Puerto Rico ( images )', +'google_com_pr','Google Puerto Rico ( catchall )', +'www_google_com_py','Google Paraguay', +'images_google_com_py','Google Paraguay ( images )', +'google_com_py','Google Paraguay ( catchall )', +'www_google_com_qa','Google Qatar', +'images_google_com_qa','Google Qatar ( images )', +'google_com_qa','Google Qatar ( catchall )', +'www_google_com_sa','Google Saudi Arabia', +'images_google_com_sa','Google Saudi Arabia ( images )', +'google_com_sa','Google Saudi Arabia ( catchall )', +'www_google_com_sb','Google Solomon Islands', +'images_google_com_sb','Google Solomon Islands ( images )', +'google_com_sb','Google Solomon Islands ( catchall )', +'www_google_com_sg','Google Singapore', +'images_google_com_sg','Google Singapore ( images )', +'google_com_sg','Google Singapore ( catchall )', +'www_google_com_sl','Google Sierra Leone', +'images_google_com_sl','Google Sierra Leone ( images )', +'google_com_sl','Google Sierra Leone ( catchall )', +'www_google_com_sv','Google El Salvador', +'images_google_com_sv','Google El Salvador ( images )', +'google_com_sv','Google El Salvador ( catchall )', +'www_google_com_tj','Google Tajikistan', +'images_google_com_tj','Google Tajikistan ( images )', +'google_com_tj','Google Tajikistan ( catchall )', +'www_google_com_tw','Google Taiwan', +'images_google_com_tw','Google Taiwan ( images )', +'google_com_tw','Google Taiwan ( catchall )', +'www_google_com_ua','Google Ukraine', +'images_google_com_ua','Google Ukraine ( images )', +'google_com_ua','Google Ukraine ( catchall )', +'www_google_com_uy','Google Uruguay', +'images_google_com_uy','Google Uruguay ( images )', +'google_com_uy','Google Uruguay ( catchall )', +'www_google_com_vc','Google Saint Vincent and the Grenadines', +'images_google_com_vc','Google Saint Vincent and the Grenadines ( images )', +'google_com_vc','Google Saint Vincent and the Grenadines ( catchall )', +'www_google_com_vn','Google Vietnam', +'images_google_com_vn','Google Vietnam ( images )', +'google_com_vn','Google Vietnam ( catchall )', +'www_google_cv','Google Cape Verde', +'images_google_cv','Google Cape Verde ( images )', +'google_cv','Google Cape Verde ( catchall )', +'www_google_cz','Google Czech Republic', +'images_google_cz','Google Czech Republic ( images )', +'google_cz','Google Czech Republic ( catchall )', +'www_google_dj','Google Djibouti', +'images_google_dj','Google Djibouti ( images )', +'google_dj','Google Djibouti ( catchall )', +'www_google_dk','Google Denmark', +'images_google_dk','Google Denmark ( images )', +'google_dk','Google Denmark ( catchall )', +'www_google_dm','Google Dominica', +'images_google_dm','Google Dominica ( images )', +'google_dm','Google Dominica ( catchall )', +'www_google_dz','Google Algeria', +'images_google_dz','Google Algeria ( images )', +'google_dz','Google Algeria ( catchall )', +'www_google_ee','Google Estonia', +'images_google_ee','Google Estonia ( images )', +'google_ee','Google Estonia ( catchall )', +'www_google_fi','Google Finland', +'images_google_fi','Google Finland ( images )', +'google_fi','Google Finland ( catchall )', +'www_google_fm','Google Federated States of Micronesia', +'images_google_fm','Google Federated States of Micronesia ( images )', +'google_fm','Google Federated States of Micronesia ( catchall )', +'www_google_ga','Google Gabon', +'images_google_ga','Google Gabon ( images )', +'google_ga','Google Gabon ( catchall )', +'www_google_ge','Google Georgia', +'images_google_ge','Google Georgia ( images )', +'google_ge','Google Georgia ( catchall )', +'www_google_gf','Google French Guiana', +'images_google_gf','Google French Guiana ( images )', +'google_gf','Google French Guiana ( catchall )', +'www_google_gg','Google Guernsey', +'images_google_gg','Google Guernsey ( images )', +'google_gg','Google Guernsey ( catchall )', +'www_google_gl','Google Greenland', +'images_google_gl','Google Greenland ( images )', +'google_gl','Google Greenland ( catchall )', +'www_google_gm','Google Gambia', +'images_google_gm','Google Gambia ( images )', +'google_gm','Google Gambia ( catchall )', +'www_google_gp','Google Guadeloupe', +'images_google_gp','Google Guadeloupe ( images )', +'google_gp','Google Guadeloupe ( catchall )', +'www_google_gy','Google Guyana', +'images_google_gy','Google Guyana ( images )', +'google_gy','Google Guyana ( catchall )', +'www_google_hn','Google Honduras', +'images_google_hn','Google Honduras ( images )', +'google_hn','Google Honduras ( catchall )', +'www_google_ht','Google Haiti', +'images_google_ht','Google Haiti ( images )', +'google_ht','Google Haiti ( catchall )', +'www_google_hu','Google Hungary', +'images_google_hu','Google Hungary ( images )', +'google_hu','Google Hungary ( catchall )', +'www_google_im','Google Isle of Man', +'images_google_im','Google Isle of Man ( images )', +'google_im','Google Isle of Man ( catchall )', +'www_google_io','Google British Indian Ocean Territory', +'images_google_io','Google British Indian Ocean Territory ( images )', +'google_io','Google British Indian Ocean Territory ( catchall )', +'www_google_iq','Google Iraq', +'images_google_iq','Google Iraq ( images )', +'google_iq','Google Iraq ( catchall )', +'www_google_is','Google Iceland', +'images_google_is','Google Iceland ( images )', +'google_is','Google Iceland ( catchall )', +'www_google_je','Google Jersey', +'images_google_je','Google Jersey ( images )', +'google_je','Google Jersey ( catchall )', +'www_google_jo','Google Jordan', +'images_google_jo','Google Jordan ( images )', +'google_jo','Google Jordan ( catchall )', +'www_google_kg','Google Kyrgyzstan', +'images_google_kg','Google Kyrgyzstan ( images )', +'google_kg','Google Kyrgyzstan ( catchall )', +'www_google_ki','Google Kiribati', +'images_google_ki','Google Kiribati ( images )', +'google_ki','Google Kiribati ( catchall )', +'www_google_kz','Google Kazakhstan', +'images_google_kz','Google Kazakhstan ( images )', +'google_kz','Google Kazakhstan ( catchall )', +'www_google_la','Google Laos', +'images_google_la','Google Laos ( images )', +'google_la','Google Laos ( catchall )', +'www_google_li','Google Liechtenstein', +'images_google_li','Google Liechtenstein ( images )', +'google_li','Google Liechtenstein ( catchall )', +'www_google_lk','Google Sri Lanka', +'images_google_lk','Google Sri Lanka ( images )', +'google_lk','Google Sri Lanka ( catchall )', +'www_google_lt','Google Lithuania', +'images_google_lt','Google Lithuania ( images )', +'google_lt','Google Lithuania ( catchall )', +'www_google_lu','Google Luxembourg', +'images_google_lu','Google Luxembourg ( images )', +'google_lu','Google Luxembourg ( catchall )', +'www_google_lv','Google Latvia', +'images_google_lv','Google Latvia ( images )', +'google_lv','Google Latvia ( catchall )', +'www_google_md','Google Moldova', +'images_google_md','Google Moldova ( images )', +'google_md','Google Moldova ( catchall )', +'www_google_me','Google Montenegro', +'images_google_me','Google Montenegro ( images )', +'google_me','Google Montenegro ( catchall )', +'www_google_mg','Google Madagascar', +'images_google_mg','Google Madagascar ( images )', +'google_mg','Google Madagascar ( catchall )', +'www_google_mk','Google Macedonia', +'images_google_mk','Google Macedonia ( images )', +'google_mk','Google Macedonia ( catchall )', +'www_google_ml','Google Mali', +'images_google_ml','Google Mali ( images )', +'google_ml','Google Mali ( catchall )', +'www_google_mn','Google Mongolia', +'images_google_mn','Google Mongolia ( images )', +'google_mn','Google Mongolia ( catchall )', +'www_google_ms','Google Montserrat', +'images_google_ms','Google Montserrat ( images )', +'google_ms','Google Montserrat ( catchall )', +'www_google_mu','Google Mauritius', +'images_google_mu','Google Mauritius ( images )', +'google_mu','Google Mauritius ( catchall )', +'www_google_mv','Google Maldives', +'images_google_mv','Google Maldives ( images )', +'google_mv','Google Maldives ( catchall )', +'www_google_mw','Google Malawi', +'images_google_mw','Google Malawi ( images )', +'google_mw','Google Malawi ( catchall )', +'www_google_ne','Google Niger', +'images_google_ne','Google Niger ( images )', +'google_ne','Google Niger ( catchall )', +'www_google_nr','Google Nauru', +'images_google_nr','Google Nauru ( images )', +'google_nr','Google Nauru ( catchall )', +'www_google_nu','Google Niue', +'images_google_nu','Google Niue ( images )', +'google_nu','Google Niue ( catchall )', +'www_google_pn','Google Pitcairn Islands', +'images_google_pn','Google Pitcairn Islands ( images )', +'google_pn','Google Pitcairn Islands ( catchall )', +'www_google_ps','Google Palestine', +'images_google_ps','Google Palestine[4] ( images )', +'google_ps','Google Palestine[4] ( catchall )', +'www_google_ro','Google Romania', +'images_google_ro','Google Romania ( images )', +'google_ro','Google Romania ( catchall )', +'www_google_rs','Google Serbia', +'images_google_rs','Google Serbia ( images )', +'google_rs','Google Serbia ( catchall )', +'www_google_ru','Google Russia', +'images_google_ru','Google Russia ( images )', +'google_ru','Google Russia ( catchall )', +'www_google_rw','Google Rwanda', +'images_google_rw','Google Rwanda ( images )', +'google_rw','Google Rwanda ( catchall )', +'www_google_sc','Google Seychelles', +'images_google_sc','Google Seychelles ( images )', +'google_sc','Google Seychelles ( catchall )', +'www_google_sh','Google Saint Helena and Ascension and Tristan da Cunha', +'images_google_sh','Google Saint Helena and Ascension and Tristan da Cunha ( images )', +'google_sh','Google Saint Helena and Ascension and Tristan da Cunha ( catchall )', +'www_google_si','Google Slovenia', +'images_google_si','Google Slovenia ( images )', +'google_si','Google Slovenia ( catchall )', +'www_google_sk','Google Slovakia', +'images_google_sk','Google Slovakia ( images )', +'google_sk','Google Slovakia ( catchall )', +'www_google_sm','Google San Marino', +'images_google_sm','Google San Marino ( images )', +'google_sm','Google San Marino ( catchall )', +'www_google_sn','Google Senegal', +'images_google_sn','Google Senegal ( images )', +'google_sn','Google Senegal ( catchall )', +'www_google_so','Google Somalia', +'images_google_so','Google Somalia ( images )', +'google_so','Google Somalia ( catchall )', +'www_google_sr','Google Suriname', +'images_google_sr','Google Suriname ( images )', +'google_sr','Google Suriname ( catchall )', +'www_google_st','Google Sao Tome and Principe', +'images_google_st','Google Sao Tome and Principe ( images )', +'google_st','Google Sao Tome and Principe ( catchall )', +'www_google_td','Google Chad', +'images_google_td','Google Chad ( images )', +'google_td','Google Chad ( catchall )', +'www_google_tg','Google Togo', +'images_google_tg','Google Togo ( images )', +'google_tg','Google Togo ( catchall )', +'www_google_tk','Google Tokelau', +'images_google_tk','Google Tokelau ( images )', +'google_tk','Google Tokelau ( catchall )', +'www_google_tl','Google Timor-Leste', +'images_google_tl','Google Timor-Leste ( images )', +'google_tl','Google Timor-Leste ( catchall )', +'www_google_tm','Google Turkmenistan', +'images_google_tm','Google Turkmenistan ( images )', +'google_tm','Google Turkmenistan ( catchall )', +'www_google_tn','Google Tunisia', +'images_google_tn','Google Tunisia ( images )', +'google_tn','Google Tunisia ( catchall )', +'www_google_to','Google Tonga', +'images_google_to','Google Tonga ( images )', +'google_to','Google Tonga ( catchall )', +'www_google_tt','Google Trinidad and Tobago', +'images_google_tt','Google Trinidad and Tobago ( images )', +'google_tt','Google Trinidad and Tobago ( catchall )', +'www_google_us','Google United States', +'images_google_us','Google United States ( images )', +'google_us','Google United States ( catchall )', +'www_google_vg','Google British Virgin Islands', +'images_google_vg','Google British Virgin Islands ( images )', +'google_vg','Google British Virgin Islands ( catchall )', +'www_google_vu','Google Vanuatu', +'images_google_vu','Google Vanuatu ( images )', +'google_vu','Google Vanuatu ( catchall )', +'www_google_ws','Google Samoa', +'images_google_ws','Google Samoa ( images )', +'google_ws','Google Samoa ( catchall )', + +'google_cache','Google (cache)', +'google_froogle','Froogle (Google)', +'google_groups','Google (Groups)', +'google_maps','Google Maps', +'googleByIP','Google (Access by IP-Address)', +'google_catchall','Google catchall sites not specified', +'google_products','Google (Products)', +'google_translate','google translate', +'google4counter','4-counter (Google)', + +'1klik','1Klik', +'1search','1search-board.com', +'1und1_de','1&1 Suche (subdomain "suche")', +'3721','3721', +'a9', 'A9', +'abacho','Abacho', +'accoona','Accoona', +'alexa','Alexa', +'aliceit','alice.it', +'aliceitmaster','search.alice.it.master', +'allesklar','allesklar.de', +'allgameshome','AllGamesHome', +'alltheweb','AllTheWeb', +'alot','alot', +'altavista','AltaVista', +'amazon','amazon', +'androidsearch','androidsearch.com', +'answerbus','Answerbus', +'anzwers','anzwers.com.au', +'aol_o2suche_de','AOL O2Suche (de)', +'aolcatchall','AOL catchall countries', +'aolcom','AOL .com', +'aolde','AOL .de', +'aolfr','AOL (fr)', +'aolpl','AOL .pl', +'aolsearchde','AOL Search de', +'aoluk','AOL .uk', +'aport','Aport', +'arianna','Arianna', +'asevenboard','asevenboard', +'askcatchall','Ask catchall sites', +'askde','Ask Deutschland', +'askes','Ask Espana', # break out Ask country specific engines. +'askfr','Ask France', +'askimages','ask images', +'askit','Ask Italia', +'askjp','Ask Japan', +'asknl','Ask Nederland', +'askuk','Ask UK', +'atlanticbb','atlanticbb', +'atlas','Atlas.cz', +'atomz','Atomz', +'att','AT&T search (powered by Google)', +'au_search_yahoo_com','Yahoo au.search.yahoo.com', +'auone','auone', +'avantfind','Avantfind', +'avg','avg', +'babylon','Babylon', +'baidu','Baidu', +'bbc','BBC', +'benefind','benefind', +'biglotron','Biglotron', +'bing','Bing', +'blekko','blekko', +'blingo','Blingo', +'bluewin','bluewin', +'bt','BT', +'bungeebonesdotcom','BungeeBones', +'ca_search_yahoo_com','Yahoo ca.search.yahoo.com', +'centraldatabase','GPU p2p search', +'centrum','Centrum.cz', +'centurylink','centurylink', +'certifiedtoolbarsearch','Certified-Toolbar Search', +'charter','charter', +'chatzum','chatzum', +'checkparams','checkparams', +'chelloat','Chello Austria', +'chellobe','Chello Belgium', +'chellocom','Chello (Country not recognized)', +'chellocz','Chello Czech Republic', +'chellofr','Chello France', +'chellohu','Chello Hungary', +'chellonl','Chello Netherlands', +'chellono','Chello Norway', +'chellopl','Chello Poland', +'chellose','Chello Sweden', +'chellosk','Chello Slovakia', +'clarosearch','Claro Search', +'clinck','clinck', +'clubinternet', 'Club-internet', +'clusty','Clusty', +'comcast','comcast', +'comettoolbar','Comet toolbar search', +'conduit','conduit', +'copernic','Copernic', +'crawler','crawler.com', +'ctrouve','C\'est trouve', +'dalesearch','Dale Search', +'danielsen','Thor (danielsen.com)', +'daum','daum', +'de_dolphin_com','Dolphin Search', +'de_wiki_gov_cn','Wiki Sucher', +'dejanews','DejaNews', +'delicious','del.icio.us (Social Bookmark)', +'delta-search','delta-search', +'digg','Digg (Social Bookmark)', +'dmoz','DMOZ', +'dodajpl','Dodaj.pl', +'dogpile','Dogpile', +'duckduckgo','DuckDuckGo', +'earthlink', 'Earth Link', +'easysearch','easysearch', +'ecosia','ecosia', +'edderkoppen','Edderkoppen', +'engine','Cade', +'eniro','Eniro', +'enirose','Eniro Sverige', +'ereadingsource','ereadingsource', +'etools_ch','eTools.ch', +'euroseek','Euroseek', +'everyclick','everyclick', +'excite','Excite', +'facemoods','facemoods', +'fastbot_de','Fastbot.de (Does not provide search keyphrases; using found page instead)', +'fbdownloader','FBDownloader (fbdownloader)', +'fdownloadr_com','FBDownloader (fdownloadr)', +'find1friend','Find1Friend', +'findamo','findamo', +'findarticles','Find Articles', +'finddk','Find', +'fireball','fireball', +'flipora','Flipora', +'foxstart','foxstart', +'fr_search_yahoo_com','Yahoo fr.search.yahoo.com', +'francite','Francite', +'free', 'Free.fr', +'freenet_de','suche.freenet.de', +'freeserve','Freeserve', +'funmoods','funmoods', +'gazetapl','Gazeta.pl', +'genieo','Genieo', +'gerypl','Gery.pl', +'globososo','Globososo', +'gmxsuche','GMX Suche', +'gmxsuche_at','GMX Suche Oesterreich', +'go','Go.com', +'go.mail.ru','go.mail.ru', +'go2net','Go2Net (Metamoteur)', +'godado','Godado.it', +'goliat','Goliat', +'goodsearch','GoodSearch', +'gotuneed','got u need', +'govome','Govome', +'haku','Ihmemaa', +'handycafe','handycafe', +'heureka','Heureka', +'hogapl','Hoga.pl', +'holasearch','Hola Search', +'hotbot','Hotbot', +'hp_my_aol','hp my aol', +'hubwe','hubwe', +'iask','Iask', +'iboats','Iboats', +'icerocket','Icerocket (Blog)', +'ichiro','Ichiro', +'icq','icq', +'ilse','Ilse', +'image_search_yahoo_co_jp','Yahoo image.search.yahoo.co.jp', +'images_search_yahoo_com','Yahoo images.search.yahoo.com', +'iminent','Iminent', +'inbox','inbox', +'incredibar','incredibar', +'incredimail','incredimail', +'indexhu','Index', +'ineffabile','Ineffabile.it (Social Bookmark)', +'infoseek','Infoseek', +'infospace','InfoSpace', +'infouk','Info UK', +'inspsearch','airzip.inspsearch.com', +'int_search_myway_com','MyWay', +'interiapl','Interia.pl', +'internetto','Internetto Kereso', +'isearch_nation_com','Nation Search', +'it_search_yahoo_com','Yahoo it.search.yahoo.com', +'iune','i-une', +'ixquick','ix quick', +'izito_catchall','izito ( catchall )', +'izito_de','izito .de', +'izito_uk','izito .uk', +'jubii','Jubii', +'jumpyit','Jumpy.it', +'juno','juno', +'jyxo','Jyxo.cz', +'kartoo','Kartoo', +'katalogonetpl','Katalog.Onet.pl', +'kataweb','Kataweb', +'keresolap_hu','Tango keresolap', +'kvasir','kvasir', +'kvitters','kvitters', +'lapkereso_hu','Startlapkereso', +'lbb','LBB', +'ledix','Ledix', +'libero','Libero IT', +'libertysurf', 'Libertysurf', +'live','Microsoft Windows Live', +'localmoxie','Local Moxie', +'looksmart_catchall','looksmart ( catchall )', +'looksmart_co_uk','looksmart .co.uk', +'lycos','Lycos', +'mamma','Mamma', +'meinestadt','meinestadt.de', +'metabot', 'MetaBot', +'metacrawler','metacrawler', +'metacrawler_de','metacrawler.de', +'metager','MetaGer', +'metahannover','uni-hannover.de', +'metasearch','metasearch', +'metaspinner','metaspinner', +'metasuche_ch','Metasuche.ch', +'metaua','meta.ua', +'miner','Meta Miner', +'mirago','Mirago (country unknown)', +'miragobe','Mirago Belgium', +'miragoch','Mirago Switzerland', +'miragocouk','Mirago UK', +'miragode','Mirago Germany', +'miragodk','Mirago Denmark', +'miragoes','Mirago Spain', +'miragofr','Mirago France', +'miragoit','Mirago Italy', +'miragonl','Mirago Netherlands', +'miragono','Mirago Norway', +'miragose','Mirago Sweden', +'mitrasites','mitrasites', +'mozbot','Mozbot', +'msn','Microsoft MSN Search', +'mys_yoursearch_me','Yoursearch.me', +'mysearch','My Search', +'mysearchdial','mysearchdial', +'mysearchresults','mysearchresults', +'myway','myway', +'mywebsearch','MyWebSearch', +'najdi','Najdi.to', +'nation','nation', +'navigationshilfe_t_online','Navigationshilfe T-Online', +'nbci','NBCI', +'netease', 'NetEase', +'netluchs','Netluchs', +'netscape','Netscape', +'netsprintpl','NetSprint.pl', +'netstjernen','Netstjernen', +'netzero','netzero', +'northernlight','NorthernLight', +'nortonsavesearch','Norton Safe Search', +'nusearch','Nusearch', +'o2pl','o2.pl', +'ofir','Ofir', +'oneseek_de','Metasuchmaschine OneSeek.de', +'onetpl','Onet.pl', +'opasia','Opasia', +'orange','orange', +'orangeworld','orangeworld', +'orbis','Orbis', +'origo','Origo-Vizsla', +'overture','Overture', +'passagen','Evreka', +'pch','pch', +'peoplecheck_de','PeopleCheck.de', +'ph_search_yahoo_com','Yahoo ph.search.yahoo.com', +'picsearch','picsearch', +'pictures','pictures', +'plusnetwork','Search Plus Network', +'pogodak','Pogodak.com', +'polskapl','Polska', +'polymeta_hu','Polymeta', +'preciobarato_xyz','preciobarato xyz', +'questionanswering','Questionanswering', +'quick','Quick.cz', +'qwant_com','qwant.com', +'r_search_yahoo_com','Yahoo r.search.yahoo.com', +'rakuten','websearch.rakuten.co.jp', +'rambler','Rambler', +'redbox','RedBox.cz', +'rr','rr', +'safehomepage_com','safehomepage.com', +'sagool','Sagool', +'sapo','Sapo', +'schoenerbrausen','Schoenerbrausen/', +'scroogle','Scroogle', +'se_search_yahoo_com','Yahoo se.search.yahoo.com', +'search.com','Search.com', +'search_1und1_de','1&1 Suche (subdomain "search")', +'search_foxtab_com','Foxtab Search', +'search_socialdownloadr_com','Socialdownloadr', +'search_yahoo_co_jp','Yahoo search.yahoo.co.jp', +'search_yahoo_com','Yahoo search.yahoo.com', +'search_zonealarm_com','Zone Alarm Search', +'searchalgo','searchalgo', +'searchalot','Searchalot', +'searchch','search ch', +'searchcompletion','searchcompletion', +'searches_qone8_com','Omiga-Plus', +'searchesnavigator','searchesnavigator', +'searchfunmoods','Funmoods', +'searchgol','Search-Gol', +'searchlistingsite','SearchLlistingSite', +'searchmobileonline','searchmobileonline', +'searchresults','Search-results', +'search-results_mobi','search-results.mobi', +'searchresultscom','search-results.com ( all sites )', +'searchresultsmobi','search-results.mobi', +'searchsafer','searchsafer', +'searchy','searchy.co.uk', +'searchya','Searchya', +'segnalo','Segnalo (Social Bookmark)', +'semalt','semalt', +'sensis','Sensis AU', +'seznam','Seznam', +'sg_search_yahoo_com','Yahoo sg.search.yahoo.com', +'sg_yhs4_search_yahoo_com','Yahoo sg.yhs4.search.yahoo.com', +'shawca','Shaw.ca', +'shinyseek\.it','Shinyseek.it', +'shoppstop','ShoppStop', +'sify','Sify', +'sky','sky', +'smartsuggestor','smartsuggestor', +'smde','SM.de - Die SuchMaschine', +'snapdo','snapdo', +'softonic','softonic', +'sogou','SoGou', +'sol','SOL', +'soso','SoSo', +'speedbit','Speedbit', +'sphere','Sphere (Blog)', +'splut','Splut', +'spotjockey','Spotjockey', +'spray','Spray', +'start','start.no', +'startlap_hu','Startlab Kereso', +'startpage','startpage.com', +'startsiden','startsiden', +'startxxl','StartXXL', +'steadysearch','Avantfind', +'stumbleupon','Stumbleupon (Social Bookmark)', +'sucheaolde','SucheAOL .de', +'sumaja','Sumaja', +'supereva','Supereva', +'surfcanyon_com','SurfCanyon', +'sweetim','sweetim', +'sweetpacks','Sweetpacks', +'swik','Swik (Social Bookmark)', +'swisscows_ch','Swisscows', +'sympatico','Sympatico', +'szukaczpl','Szukacz', +'t_online_catchall','T-Online ( catchall )', +'t_online_de','T-Online .de', +'talktalk_uk','talktalk uk', +'tango_hu','Tango', +'tbask','tb.ask ?', +'teecnoit','Teecno', +'teoma','Teoma', +'terra','Terra', +'tesco','tesco', +'theallsearches','theallsearches', +'three','three', +'tiscali','Tiscali', +'tixuma_de','Tixuma Deutschland', +'toile','Toile du Quebec', +'toshiba','toshiba', +'turtle','Turtle', +'tw_images_search_yahoo_com','Yahoo tw.images.search.yahoo.com', +'tyfon','Tyfon', +'uk_foxstart_com','Foxstart.com', +'uk_search_yahoo_com','Yahoo uk.search.yahoo.com', +'uk_yhs4_search_yahoo_com','Yahoo uk.yhs4.search.yahoo.com', +'ukdirectory','UK Directory', +'ukindex','UKIndex', +'ukplus','UK Plus', +'umfis','UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland', +'umuwa_de','Umuwa Deutschland', +'us_search_yahoo_com','Yahoo us.search.yahoo.com', +'vindex','Vindex', +'virgilio','Virgilio', +'virginmedia','virginmedia', +'vi-view_com','vi-view.com', +'vivisimo','Vivisimo', +'vlips_de','vlips .de', +'vnet','VNet', +'voila','Voila', +'wahoo','Wahoo', +'webalta','webalta.ru', +'webcrawler','WebCrawler', +'webde','Web.de', +'webmania','webmania.hu', +'whorush_com','whorush com', +'windowssearch_com','windowssearch.com', +'wisenut','WISENut', +'wow_utop_it','wow.utop.it', +'wowpl','Wow.pl', +'wowsearch','Wow Search', +'wowuk','uk.wow.com', +'wp','Wirtualna Polska', +'www_buenosearch_com','BuenoSearch', +'www_dregol_com','Dregol Search', +'www_wow_com','WOW.com', +'wwweasel','WWWeasel', +'yahoo_catchall','yahoo ( catchall )', +'yahoo_mindset','Yahoo! Mindset', +'yandex','yandex ( catchall )', +'yandexcom','yandex .com', +'yandexcomtr','yandex .com.tr', +'yandexkz','yandex .kz', +'yandexru','yandex .ru', +'yandexua','yandex .ua', +'yell','Yell', +'yourbestsearch','YourBest Search', +'youtube','youtube', +'zapmeta_catchall','zapmeta ( catchall )', +'zapmeta_ch','zapmeta ch', +'zapmeta_com','zapmeta com', +'zapmeta_de','zapmeta de', +'zhongsou','ZhongSou', +'zoeken','Zoeken', +'zoznam','Zoznam', + +# Generic search engines +'search','Unknown search engines' +); + +# Sanity check. +# Enable this code and run perl search_engines.pm to check file entries are ok +#----------------------------------------------------------------------------- +#foreach my $key (@SearchEnginesSearchIDOrder_list1) { +# if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID"); +# foreach my $key2 (@SearchEnginesSearchIDOrder_list2) { if ($key2 eq $key) { error("$key is in 1 and 2\n"); } } +# foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 1 and gen\n"); } } +#} } +#foreach my $key (@SearchEnginesSearchIDOrder_list2) { +# if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID"); +# foreach my $key2 (@SearchEnginesSearchIDOrder_list1) { if ($key2 eq $key) { error("$key is in 2 and 1\n"); } } +# foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 2 and gen\n"); } } +#} } +#foreach my $key (@SearchEnginesSearchIDOrder_listgen) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_listgen with no value in SearchEnginesHashID"); } } +#foreach my $key (keys %NotSearchEnginesKeys) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in NotSearchEnginesKeys with no value in SearchEnginesHashID"); } } +#foreach my $key (keys %SearchEnginesKnownUrl) { +# my $found=0; +# foreach my $key2 (values %SearchEnginesHashID) { +# if ($key eq $key2) { $found=1; last; } +# } +# if (! $found) { die "Entry '$key' has been found in SearchEnginesKnownUrl with no value in SearchEnginesHashID"; } +#} +#foreach my $key (keys %SearchEnginesHashLib) { +# my $found=0; +# foreach my $key2 (values %SearchEnginesHashID) { +# if ($key eq $key2) { $found=1; last; } +# } +# if (! $found) { die "Entry '$key' has been found in SearchEnginesHashLib with no value in SearchEnginesHashID"; } +#} +#print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen; + +1; -- 2.47.2