# AWSTATS ROBOTS DATABASE
#------------------------
-# Last update: 2001-10-20
+# Last update: 2001-12-02
+
+
+# List of active major robots
+@RobotArrayID_major = (
+"googlebot",
+"tcl",
+"ia_archiver",
+"scooter",
+"fast-Webcrawler",
+"bjaaland",
+"echo",
+"jeeves",
+"voila",
+"voyager",
+"mercator",
+"linkWalker",
+"slurp",
+"wisenutbot",
+"gulliver",
+"myweb",
+"wget",
+"architextspider",
+"webbase",
+"muscatferret",
+"lycos",
+"nomad",
+"appie",
+"weblayers",
+"moget",
+"unlost_web_crawler",
+"antibot",
+"harvest",
+"ferret",
+"jennybot",
+"peternews",
+"htdig"
+);
+
+@RobotArrayID_generic = (
+# Generic robot
+"robot"
+);
+
# Robot name list ("os id","os clear text")
#-------------------------------------------------------
-# Main list of robots (found at http://info.webcrawler.com/mak/projects/robots/active.html)
-# This command show how to generate tab list from this file: cat robotslist.txt | sed 's/:/ /' | awk ' /robot-id/ { name=tolower($2); } /robot-name/ { print "\""name"\", \""$0"\"," } ' | sed 's/robot-name *//g' > file
+%RobotHashIDLib = (
+# Robots found at http://www.robotstxt.org/wc/active/all.txt
# Rem: To avoid bad detection, some robots id were removed from this list:
# - Robots with ID of 2 letters only
# - Robot called "webs"
# Rem: directhit is changed into direct_hit (its real id)
# Rem: calif is changed into calif[^r] to avoid confusion between tiscalifreenet browser
-%RobotHashIDLib = (
-"acme.spider", "Acme.Spider",
-"ahoythehomepagefinder", "Ahoy! The Homepage Finder",
-"alkaline", "Alkaline",
-"appie", "Walhello appie",
-"arachnophilia", "Arachnophilia",
-"architext", "ArchitextSpider",
-"aretha", "Aretha",
-"ariadne", "ARIADNE",
-"aspider", "ASpider (Associative Spider)",
-"atn.txt", "ATN Worldwide",
-"atomz", "Atomz.com Search Robot",
-"auresys", "AURESYS",
-"backrub", "BackRub",
-"bigbrother", "Big Brother",
-"bjaaland", "Bjaaland",
-"blackwidow", "BlackWidow",
-"blindekuh", "Die Blinde Kuh",
-"bloodhound", "Bloodhound",
-"brightnet", "bright.net caching robot",
-"bspider", "BSpider",
-"cactvschemistryspider", "CACTVS Chemistry Spider",
-"calif[^r]", "Calif",
-"cassandra", "Cassandra",
-"cgireader", "Digimarc Marcspider/CGI",
-"checkbot", "Checkbot",
-"churl", "churl",
-"cmc", "CMC/0.01",
-"collective", "Collective",
-"combine", "Combine System",
-"conceptbot", "Conceptbot",
-"core", "Web Core / Roots",
-"cshkust", "CS-HKUST WISE: WWW Index and Search Engine",
-"cusco", "Cusco",
-"cyberspyder", "CyberSpyder Link Test",
-"deweb", "DeWeb(c) Katalog/Index",
-"dienstspider", "DienstSpider",
-"diibot", "Digital Integrity Robot",
-"direct_hit", "Direct Hit Grabber",
-"dnabot", "DNAbot",
-"download_express", "DownLoad Express",
-"dragonbot", "DragonBot",
-"dwcp", "DWCP (Dridus' Web Cataloging Project)",
-"ebiness", "EbiNess",
-"eit", "EIT Link Verifier Robot",
-"emacs", "Emacs-w3 Search Engine",
-"emcspider", "ananzi",
-"esther", "Esther",
-"evliyacelebi", "Evliya Celebi",
-"fdse", "Fluid Dynamics Search Engine robot",
-"felix", " Felix IDE",
-"ferret", "Wild Ferret Web Hopper #1, #2, #3",
-"fetchrover", "FetchRover",
-"fido", "fido",
-"finnish", "Hämähäkki",
-"fireball", "KIT-Fireball",
-"fish", "Fish search",
-"fouineur", "Fouineur",
-"francoroute", "Robot Francoroute",
-"freecrawl", "Freecrawl",
-"funnelweb", "FunnelWeb",
-"gazz", "gazz",
-"gcreep", "GCreep",
-"getbot", "GetBot",
-"geturl", "GetURL",
-"golem", "Golem",
-"googlebot", "Googlebot",
-"grapnel", "Grapnel/0.01 Experiment",
-"griffon", "Griffon",
-"gromit", "Gromit",
-"gulliver", "Northern Light Gulliver",
-"hambot", "HamBot",
-"harvest", "Harvest",
-"havindex", "havIndex",
-"hometown", "Hometown Spider Pro",
-"wired-digital", "Wired Digital",
-"htdig", "ht://Dig",
-"htmlgobble", "HTMLgobble",
-"hyperdecontextualizer", "Hyper-Decontextualizer",
-"ibm", "IBM_Planetwide",
-"iconoclast", "Popular Iconoclast",
-"ilse", "Ingrid",
-"imagelock", "Imagelock ",
-"incywincy", "IncyWincy",
-"informant", "Informant",
-"infoseek", "InfoSeek Robot 1.0",
-"infoseeksidewinder", "Infoseek Sidewinder",
-"infospider", "InfoSpiders",
-"inspectorwww", "Inspector Web",
-"intelliagent", "IntelliAgent",
-"iron33", "Iron33",
-"israelisearch", "Israeli-search",
-"javabee", "JavaBee",
-"jcrawler", "JCrawler",
-"jeeves", "Jeeves",
-"jobot", "Jobot",
-"joebot", "JoeBot",
-"jubii", "The Jubii Indexing Robot",
-"jumpstation", "JumpStation",
-"katipo", "Katipo",
-"kdd", "KDD-Explorer",
-"kilroy", "Kilroy",
-"ko_yappo_robot", "KO_Yappo_Robot",
-"labelgrabber.txt", "LabelGrabber",
-"larbin", "larbin",
-"legs", "legs",
-"linkscan", "LinkScan",
-"linkwalker", "LinkWalker",
-"lockon", "Lockon",
-"logo_gif", "logo.gif Crawler",
-"lycos", "Lycos",
-"macworm", "Mac WWWWorm",
-"magpie", "Magpie",
-"mediafox", "MediaFox",
-"merzscope", "MerzScope",
-"meshexplorer", "NEC-MeshExplorer",
-"mindcrawler", "MindCrawler",
-"moget", "moget",
-"momspider", "MOMspider",
-"monster", "Monster",
-"motor", "Motor",
-"muscatferret", "Muscat Ferret",
-"mwdsearch", "Mwd.Search",
-"myweb", "Internet Shinchakubin",
-"netcarta", "NetCarta WebMap Engine",
-"netmechanic", "NetMechanic",
-"netscoop", "NetScoop",
-"newscan-online", "newscan-online",
-"nhse", "NHSE Web Forager",
-"nomad", "Nomad",
-"northstar", "The NorthStar Robot",
-"nzexplorer", "nzexplorer",
-"occam", "Occam",
-"octopus", "HKU WWW Octopus",
-"orb_search", "Orb Search",
-"packrat", "Pack Rat",
-"pageboy", "PageBoy",
-"parasite", "ParaSite",
-"patric", "Patric",
-"perignator", "The Peregrinator",
-"perlcrawler", "PerlCrawler 1.0",
-"phantom", "Phantom",
-"piltdownman", "PiltdownMan",
-"pioneer", "Pioneer",
-"pitkow", "html_analyzer",
-"pjspider", "Portal Juice Spider",
-"pka", "PGP Key Agent",
-"plumtreewebaccessor", "PlumtreeWebAccessor",
-"poppi", "Poppi",
-"portalb", "PortalB Spider",
-"puu", "GetterroboPlus Puu",
-"python", "The Python Robot",
-"raven", "Raven Search",
-"rbse", "RBSE Spider",
-"resumerobot", "Resume Robot",
-"rhcs", "RoadHouse Crawling System",
-"roadrunner", "Road Runner: The ImageScape Robot",
-"robbie", "Robbie the Robot",
-"robi", "ComputingSite Robi/1.0",
-"roverbot", "Roverbot",
-"safetynetrobot", "SafetyNet Robot",
-"scooter", "Scooter",
-"search_au", "Search.Aus-AU.COM",
-"searchprocess", "SearchProcess",
-"senrigan", "Senrigan",
-"sgscout", "SG-Scout",
-"shaggy", "ShagSeeker",
-"shaihulud", "Shai'Hulud",
-"sift", "Sift",
-"simbot", "Simmany Robot Ver1.0",
-"site-valet", "Site Valet",
-"sitegrabber", "Open Text Index Robot",
-"sitetech", "SiteTech-Rover",
-"slurp", "Inktomi Slurp",
-"smartspider", "Smart Spider",
-"snooper", "Snooper",
-"solbot", "Solbot",
-"spanner", "Spanner",
-"speedy", "Speedy Spider",
-"spider_monkey", "spider_monkey",
-"spiderbot", "SpiderBot",
-"spiderman", "SpiderMan",
-"spry", "Spry Wizard Robot",
-"ssearcher", "Site Searcher",
-"suke", "Suke",
-"sven", "Sven",
-"tach_bw", "TACH Black Widow",
-"tarantula", "Tarantula",
-"tarspider", "tarspider",
-"tcl", "Tcl W3 Robot",
-"techbot", "TechBOT",
-"templeton", "Templeton",
-"titin", "TitIn",
-"titan", "TITAN",
-"tkwww", "The TkWWW Robot",
-"tlspider", "TLSpider",
-"ucsd", "UCSD Crawl",
-"udmsearch", "UdmSearch",
-"urlck", "URL Check",
-"valkyrie", "Valkyrie",
-"victoria", "Victoria",
-"visionsearch", "vision-search",
-"voyager", "Voyager",
-"vwbot", "VWbot",
-"w3index", "The NWI Robot",
-"w3m2", "W3M2",
-"wanderer", "the World Wide Web Wanderer",
-"webbandit", "WebBandit Web Spider",
-"webcatcher", "WebCatcher",
-"webcopy", "WebCopy",
-"webfetcher", "webfetcher",
-"webfoot", "The Webfoot Robot",
-"weblayers", "Weblayers",
-"weblinker", "WebLinker",
-"webmirror", "WebMirror",
-"webmoose", "The Web Moose",
-"webquest", "WebQuest",
-"webreader", "Digimarc MarcSpider",
-"webreaper", "WebReaper",
-"websnarf", "Websnarf",
-"webspider", "WebSpider",
-"webvac", "WebVac",
-"webwalk", "webwalk",
-"webwalker", "WebWalker",
-"webwatch", "WebWatch",
-"wget", "Wget",
-"whowhere", "WhoWhere Robot",
-"wmir", "w3mir",
-"wolp", "WebStolperer",
-"wombat", "The Web Wombat ",
-"worm", "The World Wide Web Worm",
-"wwwc", "WWWC Ver 0.2.5",
-"wz101", "WebZinger",
-"xget", "XGET",
-"nederland.zoek", "Nederland.zoek",
-
-# Not declared robots
-"antibot", "Antibot (Not referenced robot)",
-"cscrawler","CsCrawler (Not referenced robot)",
-"daviesbot", "DaviesBot (Not referenced robot)",
-"ezresult", "Ezresult (Not referenced robot)",
-"fast-webcrawler", "Fast-Webcrawler (Not referenced robot)",
-"gnodspider","GNOD Spider (Not referenced robot)",
-"jennybot", "JennyBot (Not referenced robot)",
-"justview", "JustView (Not referenced robot)",
-"mercator", "Mercator (Not referenced robot)",
-"perman surfer", "Perman surfer (Not referenced robot)",
-"redalert", "Red Alert (Not referenced robot)",
-"shoutcast","Shoutcast Directory Service (Not referenced robot)",
-"unlost_web_crawler", "Unlost_Web_Crawler (Not referenced robot)",
-"webbase", "WebBase (Not referenced robot)",
-"wisenutbot","WISENutbot (Not referenced robot)",
-"yandex", "Yandex bot (Not referenced robot)",
-# Supposed to be robots
-"boris", "Boris (Not referenced robot)",
-"digout4u", "digout4u (Not referenced robot)",
-"echo", "EchO! (Not referenced robot)",
-"ia_archiver", "ia_archiver (Not referenced robot)",
-"ultraseek", "Ultraseek (Not referenced robot)",
-"voila", "Voila (Not referenced robot)",
-"webcompass", "webcompass (Not referenced robot)",
-# Generic ID
-"robot", "Unknown robot (Not referenced robot)"
+"acme.spider","Acme.Spider",
+"ahoythehomepagefinder","Ahoy! The Homepage Finder",
+"alkaline","Alkaline",
+"appie","Walhello appie",
+"arachnophilia","Arachnophilia",
+"architext","ArchitextSpider",
+"aretha","Aretha",
+"ariadne","ARIADNE",
+"arks","arks",
+"aspider","ASpider (Associative Spider)",
+"atn.txt","ATN Worldwide",
+"atomz","Atomz.com Search Robot",
+"auresys","AURESYS",
+"backrub","BackRub",
+"bigbrother","Big Brother",
+"bjaaland","Bjaaland",
+"blackwidow","BlackWidow",
+"blindekuh","Die Blinde Kuh",
+"bloodhound","Bloodhound",
+"brightnet","bright.net caching robot",
+"bspider","BSpider",
+"cactvschemistryspider","CACTVS Chemistry Spider",
+"calif","Calif",
+"cassandra","Cassandra",
+"cgireader","Digimarc Marcspider/CGI",
+"checkbot","Checkbot",
+"churl","churl",
+"cmc","CMC/0.01",
+"collective","Collective",
+"combine","Combine System",
+"conceptbot","Conceptbot",
+"coolbot","CoolBot",
+"core","Web Core / Roots",
+"cosmos","XYLEME Robot",
+"cruiser","Internet Cruiser Robot",
+"cusco","Cusco",
+"cyberspyder","CyberSpyder Link Test",
+"deweb","DeWeb(c) Katalog/Index",
+"dienstspider","DienstSpider",
+"digger","Digger",
+"diibot","Digital Integrity Robot",
+"directhit","Direct Hit Grabber",
+"dnabot","DNAbot",
+"download_express","DownLoad Express",
+"dragonbot","DragonBot",
+"dwcp","DWCP (Dridus' Web Cataloging Project)",
+"e-collector","e-collector",
+"ebiness","EbiNess",
+"eit","EIT Link Verifier Robot",
+"elfinbot","ELFINBOT",
+"emacs","Emacs-w3 Search Engine",
+"emcspider","ananzi",
+"esther","Esther",
+"evliyacelebi","Evliya Celebi",
+"nzexplorer","nzexplorer",
+"fdse","Fluid Dynamics Search Engine robot",
+"felix","Felix IDE",
+"ferret","Wild Ferret Web Hopper #1, #2, #3",
+"fetchrover","FetchRover",
+"fido","fido",
+"finnish","Hämähäkki",
+"fireball","KIT-Fireball",
+"fish","Fish search",
+"fouineur","Fouineur",
+"francoroute","Robot Francoroute",
+"freecrawl","Freecrawl",
+"funnelweb","FunnelWeb",
+"gama","gammaSpider, FocusedCrawler",
+"gazz","gazz",
+"gcreep","GCreep",
+"getbot","GetBot",
+"geturl","GetURL",
+"golem","Golem",
+"googlebot","Googlebot",
+"grapnel","Grapnel/0.01 Experiment",
+"griffon","Griffon",
+"gromit","Gromit",
+"gulliver","Northern Light Gulliver",
+"hambot","HamBot",
+"harvest","Harvest",
+"havindex","havIndex",
+"hometown","Hometown Spider Pro",
+"wired-digital","Wired Digital",
+"htdig","ht://Dig",
+"htmlgobble","HTMLgobble",
+"hyperdecontextualizer","Hyper-Decontextualizer",
+"iajabot","iajaBot",
+"ibm","IBM_Planetwide",
+"iconoclast","Popular Iconoclast",
+"ilse","Ingrid",
+"imagelock","Imagelock",
+"incywincy","IncyWincy",
+"informant","Informant",
+"infoseek","InfoSeek Robot 1.0",
+"infoseeksidewinder","Infoseek Sidewinder",
+"infospider","InfoSpiders",
+"inspectorwww","Inspector Web",
+"intelliagent","IntelliAgent",
+"irobot","I, Robot",
+"iron33","Iron33",
+"israelisearch","Israeli-search",
+"javabee","JavaBee",
+"jbot","JBot Java Web Robot",
+"jcrawler","JCrawler",
+"jeeves","Jeeves",
+"jobo","JoBo Java Web Robot",
+"jobot","Jobot",
+"joebot","JoeBot",
+"jubii","The Jubii Indexing Robot",
+"jumpstation","JumpStation",
+"katipo","Katipo",
+"kdd","KDD-Explorer",
+"kilroy","Kilroy",
+"ko_yappo_robot","KO_Yappo_Robot",
+"labelgrabber.txt","LabelGrabber",
+"larbin","larbin",
+"legs","legs",
+"linkidator","Link Validator",
+"linkscan","LinkScan",
+"linkwalker","LinkWalker",
+"lockon","Lockon",
+"logo_gif","logo.gif Crawler",
+"lycos","Lycos",
+"macworm","Mac WWWWorm",
+"magpie","Magpie",
+"marvin","marvin/infoseek",
+"mattie","Mattie",
+"mediafox","MediaFox",
+"merzscope","MerzScope",
+"meshexplorer","NEC-MeshExplorer",
+"mindcrawler","MindCrawler",
+"moget","moget",
+"momspider","MOMspider",
+"monster","Monster",
+"motor","Motor",
+"muscatferret","Muscat Ferret",
+"mwdsearch","Mwd.Search",
+"myweb","Internet Shinchakubin",
+"netcarta","NetCarta WebMap Engine",
+"netmechanic","NetMechanic",
+"netscoop","NetScoop",
+"newscan-online","newscan-online",
+"nhse","NHSE Web Forager",
+"nomad","Nomad",
+"northstar","The NorthStar Robot",
+"occam","Occam",
+"octopus","HKU WWW Octopus",
+"openfind","Openfind data gatherer",
+"orb_search","Orb Search",
+"packrat","Pack Rat",
+"pageboy","PageBoy",
+"parasite","ParaSite",
+"patric","Patric",
+"pegasus","pegasus",
+"perignator","The Peregrinator",
+"perlcrawler","PerlCrawler 1.0",
+"phantom","Phantom",
+"piltdownman","PiltdownMan",
+"pimptrain","Pimptrain.com's robot",
+"pioneer","Pioneer",
+"pitkow","html_analyzer",
+"pjspider","Portal Juice Spider",
+"pka","PGP Key Agent",
+"plumtreewebaccessor","PlumtreeWebAccessor",
+"poppi","Poppi",
+"portalb","PortalB Spider",
+"puu","GetterroboPlus Puu",
+"python","The Python Robot",
+"raven","Raven Search",
+"rbse","RBSE Spider",
+"resumerobot","Resume Robot",
+"rhcs","RoadHouse Crawling System",
+"roadrunner","Road Runner: The ImageScape Robot",
+"robbie","Robbie the Robot",
+"robi","ComputingSite Robi/1.0",
+"robofox","RoboFox",
+"robozilla","Robozilla",
+"roverbot","Roverbot",
+"rules","RuLeS",
+"safetynetrobot","SafetyNet Robot",
+"scooter","Scooter",
+"search_au","Search.Aus-AU.COM",
+"searchprocess","SearchProcess",
+"senrigan","Senrigan",
+"sgscout","SG-Scout",
+"shaggy","ShagSeeker",
+"shaihulud","Shai'Hulud",
+"sift","Sift",
+"simbot","Simmany Robot Ver1.0",
+"site-valet","Site Valet",
+"sitegrabber","Open Text Index Robot",
+"sitetech","SiteTech-Rover",
+"slcrawler","SLCrawler",
+"slurp","Inktomi Slurp",
+"smartspider","Smart Spider",
+"snooper","Snooper",
+"solbot","Solbot",
+"spanner","Spanner",
+"speedy","Speedy Spider",
+"spider_monkey","spider_monkey",
+"spiderbot","SpiderBot",
+"spiderline","Spiderline Crawler",
+"spiderman","SpiderMan",
+"spiderview","SpiderView(tm)",
+"spry","Spry Wizard Robot",
+"ssearcher","Site Searcher",
+"suke","Suke",
+"suntek","suntek search engine",
+"sven","Sven",
+"tach_bw","TACH Black Widow",
+"tarantula","Tarantula",
+"tarspider","tarspider",
+"tcl","Tcl W3 Robot",
+"techbot","TechBOT",
+"templeton","Templeton",
+"teoma_agent1","TeomaTechnologies",
+"titin","TitIn",
+"titan","TITAN",
+"tkwww","The TkWWW Robot",
+"tlspider","TLSpider",
+"ucsd","UCSD Crawl",
+"udmsearch","UdmSearch",
+"urlck","URL Check",
+"valkyrie","Valkyrie",
+"victoria","Victoria",
+"visionsearch","vision-search",
+"voyager","Voyager",
+"vwbot","VWbot",
+"w3index","The NWI Robot",
+"w3m2","W3M2",
+"wallpaper","WallPaper",
+"wanderer","the World Wide Web Wanderer",
+"wapspider","w@pSpider by wap4.com",
+"webbandit","WebBandit Web Spider",
+"webcatcher","WebCatcher",
+"webcopy","WebCopy",
+"webfetcher","webfetcher",
+"webfoot","The Webfoot Robot",
+"weblayers","weblayers",
+"weblinker","WebLinker",
+"webmirror","WebMirror",
+"webmoose","The Web Moose",
+"webquest","WebQuest",
+"webreader","Digimarc MarcSpider",
+"webreaper","WebReaper",
+"websnarf","Websnarf",
+"webspider","WebSpider",
+"webvac","WebVac",
+"webwalk","webwalk",
+"webwalker","WebWalker",
+"webwatch","WebWatch",
+"wget","Wget",
+"whatuseek","whatUseek Winona",
+"whowhere","WhoWhere Robot",
+"wmir","w3mir",
+"wolp","WebStolperer",
+"wombat","The Web Wombat",
+"worm","The World Wide Web Worm",
+"wwwc","WWWC Ver 0.2.5",
+"wz101","WebZinger",
+"xget","XGET",
+"nederland.zoek","Nederland.zoek",
+# Other robots reported by users
+"antibot", "Antibot",
+"boris", "Boris",
+"cscrawler","CsCrawler",
+"daviesbot", "DaviesBot",
+"digout4u", "digout4u",
+"echo", "EchO!",
+"ezresult", "Ezresult",
+"fast-webcrawler", "Fast-Webcrawler",
+"gnodspider","GNOD Spider",
+"ia_archiver", "ia_archiver",
+"jennybot", "JennyBot",
+"justview", "JustView",
+"mercator", "Mercator",
+"perman", "Perman surfer",
+"peternews", "Peternews",
+"redalert", "Red Alert",
+"shoutcast","Shoutcast Directory Service",
+"ultraseek", "Ultraseek",
+"unlost_web_crawler", "Unlost_Web_Crawler",
+"voila", "Voila",
+"webbase", "WebBase",
+"webcompass", "webcompass",
+"wisenutbot","WISENutbot",
+"yandex", "Yandex bot",
+# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html
+# Generic root ID
+"robot", "Unknown robot"
);
+
1;