]> git.ipfire.org Git - thirdparty/sarg.git/blobdiff - log.c
Alias host names in URL and group identical names
[thirdparty/sarg.git] / log.c
diff --git a/log.c b/log.c
index f25a850a6f6a0390177506a7d4b64469feba20eb..4e5d7a1d8e91082c6f14e0a2f6efdf0c7b7ce919 100644 (file)
--- a/log.c
+++ b/log.c
@@ -1,6 +1,6 @@
 /*
  * SARG Squid Analysis Report Generator      http://sarg.sourceforge.net
- *                                                            1998, 2010
+ *                                                            1998, 2011
  *
  * SARG donations:
  *      please look at http://sarg.sourceforge.net/donations.php
@@ -44,8 +44,7 @@ struct userfilestruct
 /*@null@*/static char *userfile=NULL;
 
 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
-numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
-                               13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
+numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
 
 static void getusers(const char *pwdfile, int debug);
 
@@ -111,10 +110,11 @@ int main(int argc,char *argv[])
        char start_hour[128];
        char end_hour[128];
        char *linebuf;
-       char hostname[512];
-       char *url;
+       const char *url;
+       char *full_url;
        char *urly;
        char user[MAX_USER_LEN];
+       char splitprefix[MAXLEN];
        enum InputLogFormat ilf;
        int ilf_count[ILF_Last];
        int  ch;
@@ -168,6 +168,7 @@ int main(int argc,char *argv[])
                {"lastlog",required_argument,NULL,2},
                {"keeplogs",no_argument,NULL,3},
                {"split",no_argument,&split,1},
+               {"splitprefix",required_argument,NULL,'P'},
                {0,0,0,0}
        };
 
@@ -224,6 +225,7 @@ int main(int argc,char *argv[])
        strcpy(FontSize,"9px");
        strcpy(TempDir,"/tmp");
        strcpy(OutputDir,"/var/www/html/squid-reports");
+       AnonymousOutputFiles=false;
        Ip2Name=false;
        strcpy(DateFormat,"u");
        OverwriteReport=false;
@@ -248,20 +250,20 @@ int main(int argc,char *argv[])
        strcpy(PrivacyStringColor,"blue");
        SuccessfulMsg=true;
        TopUserFields=TOPUSERFIELDS_NUM | TOPUSERFIELDS_DATE_TIME | TOPUSERFIELDS_USERID | TOPUSERFIELDS_CONNECT |
-                                       TOPUSERFIELDS_BYTES | TOPUSERFIELDS_SETYB | TOPUSERFIELDS_IN_CACHE_OUT |
-                                       TOPUSERFIELDS_USED_TIME | TOPUSERFIELDS_MILISEC | TOPUSERFIELDS_PTIME |
-                                       TOPUSERFIELDS_TOTAL | TOPUSERFIELDS_AVERAGE;
+             TOPUSERFIELDS_BYTES | TOPUSERFIELDS_SETYB | TOPUSERFIELDS_IN_CACHE_OUT |
+             TOPUSERFIELDS_USED_TIME | TOPUSERFIELDS_MILISEC | TOPUSERFIELDS_PTIME |
+             TOPUSERFIELDS_TOTAL | TOPUSERFIELDS_AVERAGE;
        UserReportFields=USERREPORTFIELDS_CONNECT | USERREPORTFIELDS_BYTES | USERREPORTFIELDS_SETYB |
-                                               USERREPORTFIELDS_IN_CACHE_OUT | USERREPORTFIELDS_USED_TIME | USERREPORTFIELDS_MILISEC |
-                                               USERREPORTFIELDS_PTIME | USERREPORTFIELDS_TOTAL | USERREPORTFIELDS_AVERAGE;
+             USERREPORTFIELDS_IN_CACHE_OUT | USERREPORTFIELDS_USED_TIME | USERREPORTFIELDS_MILISEC |
+             USERREPORTFIELDS_PTIME | USERREPORTFIELDS_TOTAL | USERREPORTFIELDS_AVERAGE;
        strcpy(DataFileDelimiter,";");
        DataFileFields=DATA_FIELD_USER | DATA_FIELD_DATE | DATA_FIELD_TIME | DATA_FIELD_URL | DATA_FIELD_CONNECT |
-                                               DATA_FIELD_BYTES | DATA_FIELD_IN_CACHE | DATA_FIELD_OUT_CACHE | DATA_FIELD_ELAPSED;
+             DATA_FIELD_BYTES | DATA_FIELD_IN_CACHE | DATA_FIELD_OUT_CACHE | DATA_FIELD_ELAPSED;
        ShowReadStatistics=true;
        strcpy(IndexSortOrder,"D");
        ShowSargInfo=true;
        ShowSargLogo=true;
-       strcpy(ParsedOutputLog,"no");
+       ParsedOutputLog[0]='\0';
        strcpy(ParsedOutputLogCompress,"/bin/gzip -f");
        DisplayedValues=DISPLAY_ABBREV;
        strcpy(HeaderFontSize,"9px");
@@ -313,6 +315,7 @@ int main(int argc,char *argv[])
        hmf=-1;
        site[0]='\0';
        outdir[0]='\0';
+       splitprefix[0]='\0';
        elap[0]='\0';
        email[0]='\0';
        zip[0]='\0';
@@ -323,6 +326,7 @@ int main(int argc,char *argv[])
        start_hour[0]='\0';
        end_hour[0]='\0';
        hm_str[0]='\0';
+       HostAliasFile[0]='\0';
 
        denied_count=0;
        download_count=0;
@@ -364,7 +368,7 @@ int main(int argc,char *argv[])
 
        strcpy(Title,_("Squid User Access Report"));
 
-       while((ch = getopt_long_only(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:hijmnprvxyz",long_options,&option_index)) != -1){
+       while((ch = getopt_long_only(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:P:hijmnprvxyz",long_options,&option_index)) != -1){
                switch(ch)
                {
                        case 0:
@@ -442,6 +446,9 @@ int main(int argc,char *argv[])
                        case 'p':
                                userip=true;
                                break;
+                       case 'P':
+                               strcpy(splitprefix,optarg);
+                               break;
                        case 'r':
                                realt=true;
                                break;
@@ -489,10 +496,9 @@ int main(int argc,char *argv[])
                        case 'z':
                                debugz++;
                                break;
-                       /*case ':':
-                               debuga(_("Option -%c require an argument\n"),optopt);
-                               errflg++;
-                               break;*/
+                       case ':':
+                               debuga(_("Option -%c requires an argument\n"),optopt);
+                               exit(EXIT_FAILURE);
                        case '?':
                                usage(argv[0]);
                                exit(EXIT_FAILURE);
@@ -539,6 +545,9 @@ int main(int argc,char *argv[])
 
        if (lastlog>=0) LastLog=lastlog;
 
+       if(outdir[0] == '\0') strcpy(outdir,OutputDir);
+       if(outdir[0] != '\0') strcat(outdir,"/");
+
        if(realt) {
                realtime();
                exit(EXIT_SUCCESS);
@@ -553,6 +562,16 @@ int main(int argc,char *argv[])
        if(DataFile[0] != '\0')
                dataonly++;
 
+       if(df[0] == '\0') strcpy(df,DateFormat);
+       else strcpy(DateFormat,df);
+
+       if(df[0] == '\0') {
+               strcpy(df,"u");
+               strcpy(DateFormat,"u");
+       }
+       if (df[0]=='w')
+               IndexTree=INDEX_TREE_FILE;
+
        if(NAccessLog == 0) {
                strcpy(AccessLog[0],"/var/log/squid/access.log");
                NAccessLog++;
@@ -564,7 +583,7 @@ int main(int argc,char *argv[])
        }
        if(split) {
                for (iarq=0 ; iarq<NAccessLog ; iarq++)
-                       splitlog(AccessLog[iarq], df, dfrom, duntil, convert);
+                       splitlog(AccessLog[iarq], df, dfrom, duntil, convert, splitprefix);
                exit(EXIT_SUCCESS);
        }
        if(convert) {
@@ -589,28 +608,27 @@ int main(int argc,char *argv[])
 
        if(ReportType == 0) {
                ReportType=REPORT_TYPE_TOPUSERS | REPORT_TYPE_TOPSITES | REPORT_TYPE_USERS_SITES |
-                                       REPORT_TYPE_SITES_USERS | REPORT_TYPE_DATE_TIME | REPORT_TYPE_DENIED |
-                                       REPORT_TYPE_AUTH_FAILURES | REPORT_TYPE_SITE_USER_TIME_DATE | REPORT_TYPE_DOWNLOADS;
+                          REPORT_TYPE_SITES_USERS | REPORT_TYPE_DATE_TIME | REPORT_TYPE_DENIED |
+                          REPORT_TYPE_AUTH_FAILURES | REPORT_TYPE_SITE_USER_TIME_DATE | REPORT_TYPE_DOWNLOADS;
        }
 
        if(access(ExcludeUsers, R_OK) == 0) {
                getuexclude(ExcludeUsers,debug);
                fuser=true;
        }
+       if (HostAliasFile[0] != '\0')
+               read_hostalias(HostAliasFile);
 
-       indexonly=0;
+       indexonly=false;
        if(fuser) {
                if(is_indexonly())
-                       indexonly++;
+                       indexonly=true;
        }
-       if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++;
-       if(Index == INDEX_ONLY) indexonly++;
+       if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly=true;
+       if(Index == INDEX_ONLY) indexonly=true;
 
        if(MaxElapsed[0] != '\0') max_elapsed=atol(MaxElapsed);
 
-       if(outdir[0] == '\0') strcpy(outdir,OutputDir);
-       strcat(outdir,"/");
-
        if(uagent[0] == '\0') strcpy(uagent,UserAgentLog);
 
        if(tmp[0] == '\0') strcpy(tmp,TempDir);
@@ -626,16 +644,6 @@ int main(int argc,char *argv[])
                exit(EXIT_FAILURE);
        }
 
-       if(df[0] == '\0') strcpy(df,DateFormat);
-       else strcpy(DateFormat,df);
-
-       if(df[0] == '\0') {
-               strcpy(df,"u");
-               strcpy(DateFormat,"u");
-       }
-       if (df[0]=='w')
-               IndexTree=INDEX_TREE_FILE;
-
        if(email[0] == '\0' && OutputEmail[0] != '\0') strcpy(email,OutputEmail);
 
        if(email[0] != '\0') {
@@ -750,7 +758,7 @@ int main(int argc,char *argv[])
 #warning "No rlimit resource for the number of open files"
 #endif
                if(rc == -1) {
-                               debuga(_("setrlimit error - %s\n"),strerror(errno));
+                       debuga(_("setrlimit error - %s\n"),strerror(errno));
                }
 
                if(debug)
@@ -869,7 +877,7 @@ int main(int argc,char *argv[])
                                }
                        }
 
-                       if(!fp_log && strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) {
+                       if(!fp_log && ParsedOutputLog[0] && ilf!=ILF_Sarg) {
                                if(access(ParsedOutputLog,R_OK) != 0) {
                                        my_mkdir(ParsedOutputLog);
                                }
@@ -883,11 +891,11 @@ int main(int argc,char *argv[])
 
                        recs2++;
                        if( ShowReadStatistics && !from_stdin && !from_pipe && --OutputNonZero<=0) {
-                       double perc = recs2 * 100. / recs1 ;
-                       printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
-                       putchar('\r');
-                       fflush (stdout);
-                       OutputNonZero = REPORT_EVERY_X_LINES ;
+                               double perc = recs2 * 100. / recs1 ;
+                               printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
+                               putchar('\r');
+                               fflush (stdout);
+                               OutputNonZero = REPORT_EVERY_X_LINES ;
                        }
                        if(blen < 58) continue;
                        if(strstr(linebuf,"HTTP/0.0") != 0) continue;
@@ -909,7 +917,7 @@ int main(int argc,char *argv[])
                                        }
                                }
                                if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
-                                               exstring=true;
+                                       exstring=true;
                                if(exstring) continue;
                        }
 
@@ -939,11 +947,11 @@ int main(int argc,char *argv[])
                                                }
                                        }
                                        if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
-                                                       getword(fun,sizeof(fun),&gwarea,' ')<0) {
+                                           getword(fun,sizeof(fun),&gwarea,' ')<0) {
                                                debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
                                                exit(EXIT_FAILURE);
                                        }
-                                       if (getword_ptr(linebuf,&url,&gwarea,' ')<0) {
+                                       if (getword_ptr(linebuf,&full_url,&gwarea,' ')<0) {
                                                debuga(_("Maybe you have a broken url in your %s file\n"),arq);
                                                exit(EXIT_FAILURE);
                                        }
@@ -1006,6 +1014,11 @@ int main(int argc,char *argv[])
                                        imonth=month2num(mes)+1;
                                        idata=builddia(iday,imonth,iyear);
                                        computedate(iyear,imonth,iday,&tt);
+                                       if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
+                                                       tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
+                                               debuga(_("Invalid time found in %s\n"),arq);
+                                               exit(EXIT_FAILURE);
+                                       }
                                        t=&tt;
                                }
 
@@ -1036,7 +1049,7 @@ int main(int argc,char *argv[])
                                                debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
                                                exit(EXIT_FAILURE);
                                        }
-                                       if (getword_ptr(linebuf,&url,&gwarea,' ')<0){
+                                       if (getword_ptr(linebuf,&full_url,&gwarea,' ')<0){
                                                debuga(_("Maybe you have a broken url in your %s file\n"),arq);
                                                exit(EXIT_FAILURE);
                                        }
@@ -1077,7 +1090,7 @@ int main(int argc,char *argv[])
                                        debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
                                        exit(EXIT_FAILURE);
                                }
-                               if (getword_ptr(linebuf,&url,&gwarea,'\t')<0){
+                               if (getword_ptr(linebuf,&full_url,&gwarea,'\t')<0){
                                        debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
                                        exit(EXIT_FAILURE);
                                }
@@ -1112,6 +1125,11 @@ int main(int argc,char *argv[])
                                }
                                idata=builddia(iday,imonth,iyear);
                                computedate(iyear,imonth,iday,&tt);
+                               if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
+                                               tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
+                                       debuga(_("Invalid time found in %s\n"),arq);
+                                       exit(EXIT_FAILURE);
+                               }
                                t=&tt;
                        }
                        if (ilf==ILF_Isa) {
@@ -1193,7 +1211,7 @@ int main(int argc,char *argv[])
                                                }
                                                strcpy(tam,str);
                                        } else if (x==isa_cols[ISACOL_Uri]) {
-                                               url=str;
+                                               full_url=str;
                                        } else if (x==isa_cols[ISACOL_Status]) {
                                                if (strlen(str)>=sizeof(code)) {
                                                        debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
@@ -1223,6 +1241,13 @@ int main(int argc,char *argv[])
 
                                idata=builddia(iday,imonth,iyear);
                                computedate(iyear,imonth,iday,&tt);
+                               if (isa_cols[ISACOL_Time]>=0) {
+                                       if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
+                                                       tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
+                                               debuga(_("Invalid time found in %s\n"),arq);
+                                               exit(EXIT_FAILURE);
+                                       }
+                               }
                                t=&tt;
                        }
                        if (t==NULL) {
@@ -1242,12 +1267,10 @@ int main(int argc,char *argv[])
 
                        // Record only hours usage which is required
                        if (t) {
-                               if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len,
-                                                                                                                       sizeof( int ), compar ) == NULL )
+                               if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL )
                                        continue;
 
-                               if( bsearch( &( t -> tm_hour ), hours.list, hours.len,
-                                                                                                                       sizeof( int ), compar ) == NULL )
+                               if( bsearch( &( t -> tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL )
                                        continue;
                        }
 
@@ -1300,37 +1323,23 @@ int main(int argc,char *argv[])
                        }
 #endif
 
-                       urly=url;
+                       urly=full_url;
 
                        if(ilf!=ILF_Sarg) {
                                /*
                                The full URL is not saved in sarg log. There is no point in testing the URL to detect
                                a downloaded file.
                                */
-                               download_flag=is_download_suffix(url);
+                               download_flag=is_download_suffix(full_url);
                                if (download_flag) {
-                                       download_url=url;
+                                       download_url=full_url;
                                        download_count++;
                                }
                        } else
                                download_flag=false;
 
-                       // remove any protocol:// at the beginning of the URL
-                       if ((str = strchr(url,'/')) != NULL && str[1] == '/') {
-                               int i;
-
-                               str+=2;
-                               for (i=0 ; str[i] ; i++)
-                                       url[i]=str[i];
-                               url[i]='\0';
-                       }
-
-                       if(!LongUrl) {
-                               url_hostname(url,hostname,sizeof(hostname));
-                               url=hostname;
-                       }
-
-                       if(url[0] == '\0') continue;
+                       url=process_url(full_url);
+                       if (!url || url[0] == '\0') continue;
 
                        if(addr[0] != '\0'){
                                if(strcmp(addr,ip)!=0) continue;
@@ -1551,7 +1560,7 @@ int main(int argc,char *argv[])
 
        longline_destroy(&line);
        if ( fp_Download_Unsort )
-       fclose (fp_Download_Unsort);
+               fclose (fp_Download_Unsort);
 
        for (ufile=first_user_file ; ufile ; ufile=ufile1) {
                ufile1=ufile->next;
@@ -1562,6 +1571,7 @@ int main(int argc,char *argv[])
        free_download();
        free_excludecodes();
        free_exclude();
+       free_hostalias();
 
        if(debug) {
                int totalcount=0;
@@ -1616,7 +1626,6 @@ int main(int argc,char *argv[])
        if(debug)
                debuga(_("Period: %s\n"),period.text);
 
-//   fclose(fp_ou);
        if(fp_denied)
                fclose(fp_denied);
        if(fp_authfail)
@@ -1659,7 +1668,10 @@ int main(int argc,char *argv[])
                        debuga(_("sort command: %s\n"),csort);
                        exit(EXIT_FAILURE);
                }
-               unlink(denied_unsort);
+               if (unlink(denied_unsort)) {
+                       debuga(_("Cannot delete %s - %s\n"),denied_unsort,strerror(errno));
+                       exit(EXIT_FAILURE);
+               }
        }
 
        sort_users_log(tmp, debug);
@@ -1669,8 +1681,9 @@ int main(int argc,char *argv[])
        else
                gerarel();
 
-       if((ReportType & REPORT_TYPE_DENIED) != 0)
+       if((ReportType & REPORT_TYPE_DENIED) != 0) {
                unlink(denied_sort);
+       }
 
        if(strcmp(tmp,"/tmp") != 0) {
                unlinkdir(tmp,0);