# use strict and use vars are commented to make AWStats working with old perl.
use strict;no strict "refs";
-use vars qw(%DomainsHashIDLib @RobotsSearchIDOrder_list1 @RobotsSearchIDOrder_list2 @RobotsSearchIDOrder_list3 @BrowsersSearchIDOrder @OSSearchIDOrder @WordsToCleanSearchUrl %BrowsersHereAreGrabbers %BrowsersHashIcon %BrowsersHashIDLib %OSHashID %OSHashLib %RobotsHashIDLib %SearchEnginesHashIDLib %SearchEnginesKnownUrl %DomainsHashIDLib);
+use vars qw(%DomainsHashIDLib @RobotsSearchIDOrder_list1 @RobotsSearchIDOrder_list2 @RobotsSearchIDOrder_list3 @BrowsersSearchIDOrder @OSSearchIDOrder @WordsToCleanSearchUrl %BrowsersHereAreGrabbers %BrowsersHashIcon %BrowsersHashIDLib %OSHashID %OSHashLib %RobotsHashIDLib @SearchEnginesSearchIDOrder %SearchEnginesHashIDLib %SearchEnginesKnownUrl %DomainsHashIDLib);
#use warnings; # Must be used in test mode only. This reduce a little process speed
#use diagnostics; # Must be used in test mode only. This reduce a lot of process speed
use Socket;
my @DOWIndex=();
my @RobotArrayList = my @RobotsSearchIDOrder = ();
#my @RobotsSearchIDOrder_list1=(); my @RobotsSearchIDOrder_list2=(); my @RobotsSearchIDOrder_list3=();
-#my @BrowsersSearchIDOrder = my @OSSearchIDOrder = ();
+#my @BrowsersSearchIDOrder = my @OSSearchIDOrder = my @SearchEnginesSearchIDOrder();
#my @WordsToCleanSearchUrl=();
my @_msiever_h = my @_nsver_h = ();
my @_from_p = my @_from_h = ();
# Sanity check.
if (@OSSearchIDOrder != scalar keys %OSHashID) { error("Error: Not same number of records of OSSearchIDOrder (".(@OSSearchIDOrder)." entries) and OSHashID (".(scalar keys %OSHashID)." entries) in OS database. Check your file ".$FilePath{"operating_systems.pl"}); }
if (@BrowsersSearchIDOrder != scalar keys %BrowsersHashIDLib) { error("Error: Not same number of records of BrowsersSearchIDOrder (".(@BrowsersSearchIDOrder)." entries) and BrowsersHashIDLib (".(scalar keys %BrowsersHashIDLib)." entries) in Browsers database. Check your file ".$FilePath{"browsers.pl"}); }
+ if (@SearchEnginesSearchIDOrder != scalar keys %SearchEnginesHashIDLib) { error("Error: Not same number of records of SearchEnginesSearchIDOrder (".(@SearchEnginesSearchIDOrder)." entries) and SearchEnginesHashIDLib (".(scalar keys %SearchEnginesHashIDLib)." entries) in Search Engines database. Check your file ".$FilePath{"search_engines.pl"}); }
if ((@RobotsSearchIDOrder_list1+@RobotsSearchIDOrder_list2+@RobotsSearchIDOrder_list3) != scalar keys %RobotsHashIDLib) { error("Error: Not same number of records of RobotsSearchIDOrder_listx (total is ".(@RobotsSearchIDOrder_list1+@RobotsSearchIDOrder_list2+@RobotsSearchIDOrder_list3)." entries) and RobotsHashIDLib (".(scalar keys %RobotsHashIDLib)." entries) in Robots database. Check your file ".$FilePath{"robots.pl"}); }
}
# Other ?
if (!$found) {
- foreach my $key (@BrowsersSearchIDOrder) {
+ foreach my $key (@BrowsersSearchIDOrder) { # Search ID in order of BrowsersSearchIDOrder
if ($UserAgent =~ /$key/) {
$_browser_h{$key}++;
$found=1;
if (! $TmpHashOS{$UserAgent}) {
my $found=0;
# in OSHashID list ?
- foreach my $key (@OSSearchIDOrder) { # Searchin ID in order of OSSearchIDOrder
+ foreach my $key (@OSSearchIDOrder) { # Search ID in order of OSSearchIDOrder
if ($UserAgent =~ /$key/) {
$_os_h{$OSHashID{$key}}++;
$found=1;
if ($LevelForRefererAnalyze && $field[$pos_referer]) {
# Direct ?
- if ($field[$pos_referer] eq "-" || $field[$pos_referer] eq "bookmarks") { # "bookmarks" is sent by Netscape
+ if ($field[$pos_referer] eq "-" || $field[$pos_referer] eq "bookmarks") { # "bookmarks" is sent by Netscape, "-" by all others browsers
if ($PageBool) { $_from_p[0]++; }
$_from_h[0]++;
$found=1;
if ($LevelForSearchEnginesDetection) {
- # If made on each record -> -1700 rows/seconds (should be made on 10% of records only)
- foreach my $key (keys %SearchEnginesHashIDLib) {
- # This hit came from the search engine $key
- if ($refererserver =~ /$key/i) {
- if ($Debug) { debug("Server $refererserver is added to TmpHashRefererServer with value '$key'",2); }
- $TmpHashRefererServer{$refererserver}="$key";
- $found=1;
- last;
+ # If made on each record -> -1700 rows/seconds (should be made on 10% of records only)
+ foreach my $key (@SearchEnginesSearchIDOrder) { # Search ID in order of SearchEnginesSearchIDOrder
+ if ($refererserver =~ /$key/i) {
+ # This hit came from the search engine $key
+ if ($Debug) { debug("Server $refererserver is added to TmpHashRefererServer with value '$key'",2); }
+ $TmpHashRefererServer{$refererserver}="$key";
+ $found=1;
+ last;
+ }
}
}
-
- }
-
}
}
# If you want to add a Browser to extend AWStats database detection capabilities,
# you must add an entry in BrowsersSearchIDOrder and in BrowsersHashIDLib.
#-------------------------------------------------------
-# Last change $Revision$ - $Author$ - $Date$
+# $Revision$ - $Author$ - $Date$
"libwww" # Must be at end because some browser have both "browser id" and "libwww"
);
+# BrowsersHashIDLib
+# List of browser's name ("browser id in lower case", "browser text")
+#---------------------------------------------------------------
+%BrowsersHashIDLib = (
+# Common web browsers text (IE and Netscape must not be in this list)
+"icab","iCab",
+"go!zilla","Go!Zilla",
+"konqueror","Konqueror",
+"links","Links",
+"lynx","Lynx",
+"omniweb","OmniWeb",
+"opera","Opera",
+"wget","Wget",
+"22acidownload","22AciDownload",
+"aol\\-iweng","AOL-Iweng",
+"amaya","Amaya",
+"amigavoyager","AmigaVoyager",
+"antfresco","ANT Fresco",
+"bpftp","BPFTP",
+"cyberdog","Cyberdog",
+"dreamcast","Dreamcast",
+"downloadagent","DownloadAgent",
+"ecatch", "eCatch",
+"emailsiphon","EmailSiphon",
+"encompass","Encompass",
+"friendlyspider","FriendlySpider",
+"getright","GetRight",
+"headdump","HeadDump",
+"hotjava","Sun HotJava",
+"ibrowse","IBrowse",
+"intergo","InterGO",
+"linemodebrowser","W3C Line Mode Browser",
+"lotus-notes","Lotus Notes web client",
+"macweb","MacWeb",
+"ncsa_mosaic","NCSA Mosaic",
+"netpositive","NetPositive",
+"nutscrape", "Nutscrape",
+"msfrontpageexpress","MS FrontPage Express",
+"tzgeturl","TZGETURL",
+"viking","Viking",
+"webfetcher","WebFetcher",
+"webexplorer","IBM-WebExplorer",
+"webmirror","WebMirror",
+"webvcr","WebVCR",
+# Site grabbers
+"teleport","TelePort Pro (site grabber)",
+"webcapture","Acrobat (site grabber)",
+"webcopier", "WebCopier (site grabber)",
+# Music only browsers
+"real","RealAudio or compatible (media player)",
+"winamp","WinAmp (media player)", # Works for winampmpeg and winamp3httprdr
+"windows-media-player","Windows Media Player (media player)",
+"audion","Audion (media player)",
+"freeamp","FreeAmp (media player)",
+"itunes","Apple iTunes (media player)",
+"jetaudio","JetAudio (media player)",
+"mint_audio","Mint Audio (media player)",
+"mpg123","mpg123 (media player)",
+"nsplayer","NetShow Player (media player)",
+"sonique","Sonique (media player)",
+"uplayer","Ultra Player (media player)",
+"xmms","XMMS (media player)",
+"xaudio","Some XAudio Engine based MPEG player (media player)",
+# PDA/Phonecell browsers
+"mmef","Microsoft Mobile Explorer (PDA/Phone browser)",
+"mspie","MS Pocket Internet Explorer (PDA/Phone browser)",
+"up\.","UP.Browser (PDA/Phone browser)", # Works for UP.Browser and UP.Link
+"wapalizer","WAPalizer (PDA/Phone browser)",
+"wapsilon","WAPsilon (PDA/Phone browser)",
+"webcollage","WebCollage (PDA/Phone browser)",
+"alcatel","Alcatel Browser (PDA/Phone browser)",
+"nokia","Nokia Browser (PDA/Phone browser)",
+# Others (TV)
+"webtv","WebTV browser",
+# Other kind of browsers
+"csscheck","WDG CSS Validator",
+"w3m","w3m",
+"w3c_css_validator","W3C CSS Validator",
+"w3c_validator","W3C HTML Validator",
+"wdg_validator","WDG HTML Validator",
+"webzip","WebZIP",
+"staroffice","StarOffice",
+"libwww","LibWWW"
+);
+
+
# BrowsersHashAreGrabber
# Put here an entry for each browser in BrowsersSearchIDOrder that are grabber
# browsers.
"webcopier","1",,
);
+
# BrowsersHashIcon
# Each Browsers Search ID is associated to a string that is the name of icon
# file for this OS.
);
-# Browser name list ("browser id in lower case", "browser text")
-#---------------------------------------------------------------
-%BrowsersHashIDLib = (
-# Common web browsers text (IE and Netscape must not be in this list)
-"icab","iCab",
-"go!zilla","Go!Zilla",
-"konqueror","Konqueror",
-"links","Links",
-"lynx","Lynx",
-"omniweb","OmniWeb",
-"opera","Opera",
-"wget","Wget",
-"22acidownload","22AciDownload",
-"aol\\-iweng","AOL-Iweng",
-"amaya","Amaya",
-"amigavoyager","AmigaVoyager",
-"antfresco","ANT Fresco",
-"bpftp","BPFTP",
-"cyberdog","Cyberdog",
-"dreamcast","Dreamcast",
-"downloadagent","DownloadAgent",
-"ecatch", "eCatch",
-"emailsiphon","EmailSiphon",
-"encompass","Encompass",
-"friendlyspider","FriendlySpider",
-"getright","GetRight",
-"headdump","HeadDump",
-"hotjava","Sun HotJava",
-"ibrowse","IBrowse",
-"intergo","InterGO",
-"linemodebrowser","W3C Line Mode Browser",
-"lotus-notes","Lotus Notes web client",
-"macweb","MacWeb",
-"ncsa_mosaic","NCSA Mosaic",
-"netpositive","NetPositive",
-"nutscrape", "Nutscrape",
-"msfrontpageexpress","MS FrontPage Express",
-"tzgeturl","TZGETURL",
-"viking","Viking",
-"webfetcher","WebFetcher",
-"webexplorer","IBM-WebExplorer",
-"webmirror","WebMirror",
-"webvcr","WebVCR",
-# Site grabbers
-"teleport","TelePort Pro (site grabber)",
-"webcapture","Acrobat (site grabber)",
-"webcopier", "WebCopier (site grabber)",
-# Music only browsers
-"real","RealAudio or compatible (media player)",
-"winamp","WinAmp (media player)", # Works for winampmpeg and winamp3httprdr
-"windows-media-player","Windows Media Player (media player)",
-"audion","Audion (media player)",
-"freeamp","FreeAmp (media player)",
-"itunes","Apple iTunes (media player)",
-"jetaudio","JetAudio (media player)",
-"mint_audio","Mint Audio (media player)",
-"mpg123","mpg123 (media player)",
-"nsplayer","NetShow Player (media player)",
-"sonique","Sonique (media player)",
-"uplayer","Ultra Player (media player)",
-"xmms","XMMS (media player)",
-"xaudio","Some XAudio Engine based MPEG player (media player)",
-# PDA/Phonecell browsers
-"mmef","Microsoft Mobile Explorer (PDA/Phone browser)",
-"mspie","MS Pocket Internet Explorer (PDA/Phone browser)",
-"up\.","UP.Browser (PDA/Phone browser)", # Works for UP.Browser and UP.Link
-"wapalizer","WAPalizer (PDA/Phone browser)",
-"wapsilon","WAPsilon (PDA/Phone browser)",
-"webcollage","WebCollage (PDA/Phone browser)",
-"alcatel","Alcatel Browser (PDA/Phone browser)",
-"nokia","Nokia Browser (PDA/Phone browser)",
-# Others (TV)
-"webtv","WebTV browser",
-# Other kind of browsers
-"csscheck","WDG CSS Validator",
-"w3m","w3m",
-"w3c_css_validator","W3C CSS Validator",
-"w3c_validator","W3C HTML Validator",
-"wdg_validator","WDG HTML Validator",
-"webzip","WebZIP",
-"staroffice","StarOffice",
-"libwww","LibWWW"
-);
-
1;
# AWSTATS SEARCH ENGINES DATABASE
-#--------------------------------
-# Last change $Revision$ - $Author$ - $Date$
+#-------------------------------------------------------
+# If you want to add a Search Engine to extend AWStats database detection capabilities,
+# you must add an entry in SearchEnginesSearchIDOrder and in SearchEnginesHashIDLib.
+# An entry if known in SearchEnginesKnownUrl is also welcome.
+#-------------------------------------------------------
+# $Revision$ - $Author$ - $Date$
-# Search engines names database
-# To add a search engine, add a new line:
+# SearchEnginesSearchIDOrder
+# This list is used to know in which order to search Search Engines IDs (Most
+# frequent one are first in this list to increase detect speed).
+# Note: Browsers IDs are in lower case and ' ' and '+' are changed into '_'
+#-----------------------------------------------------------------
+@SearchEnginesSearchIDOrder=(
+# Major internationnal search engines
+"google\.",
+"msn\.",
+"voila\.",
+"yahoo\.",
+"lycos\.",
+"altavista\.",
+"search\.terra\.",
+"alltheweb\.com",
+"netscape\.",
+"dmoz\.org",
+"search\.aol\.co",
+"www\.search\.com",
+"overture\.com", # Replace "goto\.com","Goto.com",
+# Minor internationnal search engines
+"northernlight\.",
+"hotbot\.",
+"kvasir\.",
+"webcrawler\.",
+"metacrawler\.",
+"go2net\.com",
+"go\.com",
+"euroseek\.",
+"excite\.",
+"lokace\.",
+"spray\.",
+"netfind\.aol\.com",
+"recherche\.aol\.fr",
+"nbci\.com/search",
+"askjeeves\.",
+"mamma\.",
+"dejanews\.",
+"search\.dogpile\.com",
+"wisenut\.com",
+# Minor brazilian search engines
+"engine\.exe", "miner\.bol\.com\.br",
+# Minor danish search-engines
+"opasia\.dk", "danielsen\.com",
+# Minor dutch search engines
+"ilse\.","vindex\.",
+# Minor english search engines
+"splut\.","ukplus\.","mirago\.","ukindex\.co\.uk","ukdirectory\.",
+# Minor finnish search engines
+"haku\.www\.fi",
+# Minor french search engines
+"nomade\.fr/","ctrouve\.","francite\.","\.lbb\.org","rechercher\.libertysurf\.fr",
+# Minor german search engines
+"fireball\.de","infoseek\.de","suche\.web\.de","meta\.ger",
+# Minor italian search engines
+"virgilio\.it",
+# Minor norvegian search engines
+"sok\.start\.no",
+# Minor swedish search engines
+"evreka\.passagen\.se",
+# Minor czech search engines
+"atlas\.cz","seznam\.cz","quick\.cz","centrum\.cz","najdi\.to","redbox\.cz",
+# Other
+"search\..*com"
+);
+
+
+# SearchEnginesHashIDLib
+# List of search engines names
# "match_string_in_url_that_identify_engine", "search_engine_name",
#-----------------------------------------------------------------
%SearchEnginesHashIDLib=(
-# Most common search engines
-"yahoo\.","Yahoo",
-"altavista\.","AltaVista",
+# Major internationnal search engines
+"google\.","Google",
"msn\.","MSN",
"voila\.", "Voila",
+"yahoo\.","Yahoo",
"lycos\.","Lycos",
+"altavista\.","AltaVista",
"search\.terra\.","Terra",
-"google\.","Google",
"alltheweb\.com","AllTheWeb",
"netscape\.","Netscape",
-"northernlight\.","NorthernLight",
"dmoz\.org","DMOZ",
"search\.aol\.co","AOL",
"www\.search\.com","Search.com",
-"kvasir\.","Kvasir",
-# Others
+"overture\.com","Overture", # Replace "goto\.com","Goto.com",
+# Minor internationnal search engines
+"northernlight\.","NorthernLight",
"hotbot\.","Hotbot",
+"kvasir\.","Kvasir",
"webcrawler\.","WebCrawler",
"metacrawler\.","MetaCrawler (Metamoteur)",
"go2net\.com","Go2Net (Metamoteur)",
"go\.com","Go.com",
-"overture\.com","Overture", # Replace "goto\.com","Goto.com",
"euroseek\.","Euroseek",
"excite\.","Excite",
"lokace\.", "Lokace",
"dejanews\.","DejaNews",
"search\.dogpile\.com","Dogpile",
"wisenut\.com","WISENut",
-"engine\.exe","Cade", "miner\.bol\.com\.br","Meta Miner", # Minor brazilian search engines
-"opasia\.dk","Opasia", "danielsen\.com","Thor (danielsen.com)", # Minor danish search-engines
-"ilse\.","Ilse","vindex\.","Vindex\.nl", # Minor dutch search engines
-"splut\.","Splut", "ukplus\.", "UKPlus", "mirago\.", "Mirago", "ukindex\.co\.uk", "UKIndex", "ukdirectory\.","UK Directory", # Minor english search engines
-"haku\.www\.fi","Ihmemaa", # Minor finnish search engines
-"nomade\.fr/","Nomade", "ctrouve\.","C'est trouvé", "francite\.","Francité", "\.lbb\.org", "LBB", "rechercher\.libertysurf\.fr", "Libertysurf", # Minor french search engines
-"fireball\.de","Fireball", "infoseek\.de","Infoseek", "suche\.web\.de","Web.de", "meta\.ger","MetaGer", # Minor german search engines
-"virgilio\.it","Virgilio", # Minor italian search engines
-"sok\.start\.no","start.no", # Minor norvegian search engines
-"evreka\.passagen\.se","Evreka", # Minor swedish search engines
-"atlas\.cz","Atlas.cz", "seznam\.cz","Seznam.cz", "quick\.cz","Quick.cz", "centrum\.cz","Centrum.cz", #Minor czech search engines
+# Minor brazilian search engines
+"engine\.exe","Cade", "miner\.bol\.com\.br","Meta Miner",
+# Minor danish search-engines
+"opasia\.dk","Opasia", "danielsen\.com","Thor (danielsen.com)",
+# Minor dutch search engines
+"ilse\.","Ilse","vindex\.","Vindex\.nl",
+# Minor english search engines
+"splut\.","Splut", "ukplus\.", "UKPlus", "mirago\.", "Mirago", "ukindex\.co\.uk", "UKIndex", "ukdirectory\.","UK Directory",
+# Minor finnish search engines
+"haku\.www\.fi","Ihmemaa",
+# Minor french search engines
+"nomade\.fr/","Nomade", "ctrouve\.","C'est trouvé", "francite\.","Francité", "\.lbb\.org", "LBB", "rechercher\.libertysurf\.fr", "Libertysurf",
+# Minor german search engines
+"fireball\.de","Fireball", "infoseek\.de","Infoseek", "suche\.web\.de","Web.de", "meta\.ger","MetaGer",
+# Minor italian search engines
+"virgilio\.it","Virgilio",
+# Minor norvegian search engines
+"sok\.start\.no","start.no",
+# Minor swedish search engines
+"evreka\.passagen\.se","Evreka",
+# Minor czech search engines
+"atlas\.cz","Atlas.cz", "seznam\.cz","Seznam.cz", "quick\.cz","Quick.cz", "centrum\.cz","Centrum.cz","najdi\.to","Najdi.to","redbox\.cz","RedBox.cz",
+# Other
"search\..*com","Other search engines"
);
-# Search engines known URLs rules to find keywords
+
+# SearchEnginesKnownUrl
+# Search engines known rules to extract keywords from a referrer URL
#-------------------------------------------------
%SearchEnginesKnownUrl=(
# Most common search engines
"fireball\.de","q=", "infoseek\.de","qt=", "suche\.web\.de","su=", # Minor german search engines
"sok\.start\.no", "q=", # Minor norvegian search engines
"evreka\.passagen\.se","q=", # Minor swedish search engines
-"atlas\.cz","searchtext=", "seznam\.cz","w=", "ftxt\.quick\.cz","query=", "centrum\.cz","q=" # Minor czech search engines
+"atlas\.cz","searchtext=", "seznam\.cz","w=", "ftxt\.quick\.cz","query=", "centrum\.cz","q=", "najdi\.to","dotaz=", "redbox.cz","srch=" # Minor czech search engines
);
+
# If no rules are known, this will be used to clean URL of not keyword parameters.
@WordsToCleanSearchUrl= ("act=","annuaire=","btng=","categoria=","cfg=","cof=","cou=","cp=","dd=","domain=","dt=","dw=","exec=","geo=","hc=","height=","hl=","hq=","hs=","id=","kl=","lang=","loc=","lr=","matchmode=","medor=","message=","meta=","mode=","order=","page=","par=","pays=","pg=","pos=","prg=","qc=","refer=","sa=","safe=","sc=","sort=","src=","start=","style=","stype=","sum=","tag=","temp=","theme=","url=","user=","width=","what=","\\.x=","\\.y=","y=","look=");
# Never put the following exclusion ("ask=","claus=","general=","kw=","keyword=","keywords=","MT","p=","q=","qr=","qt=","query=","s=","search=","searchText=","string=","su=","w=") because they are strings that contain keywords we're looking for.